Skip to content

Commit e9d98d9

Browse files
authored
Merge branch 'OpenXiangShan:master' into chi-coupledl2-dsu
2 parents cc37877 + d9b702d commit e9d98d9

File tree

11 files changed

+97
-46
lines changed

11 files changed

+97
-46
lines changed

.github/workflows/main.yml

+6-2
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ jobs:
6969
rm -rf ./dut/CoupledL2 && ln -s ../.. ./dut/CoupledL2
7070
make coupledL2-test-l2l3l2 run THREADS_BUILD=4 CXX_COMPILER=clang++-17
7171
72-
- name: Compile CHI QuadCore
72+
- name: Unit test for CHI version
7373
run: |
74-
make test-top-chi-quadcore-2ul
74+
cd tl-test-new/dut
75+
git clone https://github.com/OpenXiangShan/OpenLLC
76+
cd OpenLLC && make init && cd ..
77+
rm -rf ./CoupledL2 && ln -s ./OpenLLC ./CoupledL2 && cd ..
78+
make coupledL2-test-l2l3l2 run THREADS_BUILD=4 CXX_COMPILER=clang++-17

src/main/scala/coupledL2/RequestArb.scala

+6-6
Original file line numberDiff line numberDiff line change
@@ -130,19 +130,19 @@ class RequestArb(implicit p: Parameters) extends L2Module {
130130
(if (io.fromTXRSP.isDefined) io.fromTXRSP.get.blockSinkBReqEntrance else false.B)
131131
val block_C = io.fromMSHRCtl.blockC_s1 || io.fromMainPipe.blockC_s1 || io.fromGrantBuffer.blockSinkReqEntrance.blockC_s1
132132

133-
val noFreeWay = Wire(Bool())
133+
// val noFreeWay = Wire(Bool())
134134

135135
val sinkValids = VecInit(Seq(
136136
io.sinkC.valid && !block_C,
137137
io.sinkB.valid && !block_B,
138-
io.sinkA.valid && !block_A && !noFreeWay
138+
io.sinkA.valid && !block_A
139139
)).asUInt
140140

141141
// TODO: A Hint is allowed to enter if !s2_ready for mcp2_stall
142142

143143
val sink_ready_basic = io.dirRead_s1.ready && resetFinish && !mshr_task_s1.valid && s2_ready
144144

145-
io.sinkA.ready := sink_ready_basic && !block_A && !sinkValids(1) && !sinkValids(0) && !noFreeWay // SinkC prior to SinkA & SinkB
145+
io.sinkA.ready := sink_ready_basic && !block_A && !sinkValids(1) && !sinkValids(0) // SinkC prior to SinkA & SinkB
146146
io.sinkB.ready := sink_ready_basic && !block_B && !sinkValids(0) // SinkB prior to SinkA
147147
io.sinkC.ready := sink_ready_basic && !block_C
148148

@@ -195,19 +195,19 @@ class RequestArb(implicit p: Parameters) extends L2Module {
195195
task_s2.valid := s1_fire
196196
when(s1_fire) { task_s2.bits := task_s1.bits }
197197

198-
val sameSet_s2 = task_s2.valid && task_s2.bits.fromA && !task_s2.bits.mshrTask && task_s2.bits.set === A_task.set
198+
/* val sameSet_s2 = task_s2.valid && task_s2.bits.fromA && !task_s2.bits.mshrTask && task_s2.bits.set === A_task.set
199199
val sameSet_s3 = RegNext(task_s2.valid && task_s2.bits.fromA && !task_s2.bits.mshrTask) &&
200200
RegEnable(task_s2.bits.set, task_s2.valid) === A_task.set
201201
val sameSetCnt = PopCount(VecInit(io.msInfo.map(s => s.valid && s.bits.set === A_task.set && s.bits.fromA) :+
202202
sameSet_s2 :+ sameSet_s3).asUInt)
203203
noFreeWay := sameSetCnt >= cacheParams.ways.U
204-
204+
*/
205205
io.taskToPipe_s2 := task_s2
206206

207207
// MSHR task
208208
val mshrTask_s2 = task_s2.valid && task_s2.bits.mshrTask
209209
val mshrTask_s2_a_upwards = task_s2.bits.fromA &&
210-
(task_s2.bits.opcode === GrantData || task_s2.bits.opcode === Grant ||
210+
(task_s2.bits.opcode === GrantData || task_s2.bits.opcode === Grant && task_s2.bits.dsWen ||
211211
task_s2.bits.opcode === AccessAckData || task_s2.bits.opcode === HintAck && task_s2.bits.dsWen)
212212
// For GrantData, read refillBuffer
213213
// Caution: GrantData-alias may read DataStorage or ReleaseBuf instead

src/main/scala/coupledL2/RequestBuffer.scala

+17-3
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete
7575
val mshrInfo = Vec(mshrsAll, Flipped(ValidIO(new MSHRInfo)))
7676
val aMergeTask = ValidIO(new AMergeTask)
7777
val mainPipeBlock = Input(Vec(2, Bool()))
78+
/* Snoop task from arbiter at stage 2 */
79+
val taskFromArb_s2 = Flipped(ValidIO(new TaskBundle()))
7880

7981
val ATag = Output(UInt(tagBits.W))
8082
val ASet = Output(UInt(setBits.W))
@@ -143,8 +145,20 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete
143145
io.aMergeTask.bits.id := mergeAId
144146
io.aMergeTask.bits.task := in
145147

148+
/*
149+
noFreeWay check: s2 + s3 + mshrs >= ways(L2)
150+
*/
151+
val task_s2 = io.taskFromArb_s2
152+
val sameSet_s2 = task_s2.valid && task_s2.bits.fromA && !task_s2.bits.mshrTask && task_s2.bits.set === io.ASet
153+
val sameSet_s3 = RegNext(task_s2.valid && task_s2.bits.fromA && !task_s2.bits.mshrTask) &&
154+
RegEnable(task_s2.bits.set, task_s2.valid) === io.ASet
155+
val sameSetCnt = PopCount(VecInit(io.mshrInfo.map(s => s.valid && s.bits.set === io.ASet && s.bits.fromA) :+
156+
sameSet_s2 :+ sameSet_s3).asUInt)
157+
val noFreeWay = sameSetCnt >= cacheParams.ways.U
158+
dontTouch (noFreeWay)
159+
146160
// flow not allowed when full, or entries might starve
147-
val canFlow = flow.B && !full && !conflict(in) && !chosenQValid && !Cat(io.mainPipeBlock).orR
161+
val canFlow = flow.B && !full && !conflict(in) && !chosenQValid && !Cat(io.mainPipeBlock).orR && !noFreeWay
148162
val doFlow = canFlow && io.out.ready
149163
io.hasLatePF := latePrefetch(in) && io.in.valid && !sameAddr(in, RegNext(in))
150164
io.hasMergeA := mergeA && io.in.valid && !sameAddr(in, RegNext(in))
@@ -177,7 +191,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete
177191

178192
entry.valid := true.B
179193
// when Addr-Conflict / Same-Addr-Dependent / MainPipe-Block / noFreeWay-in-Set, entry not ready
180-
entry.rdy := !conflict(in) && !mpBlock && !s1Block // && !Cat(depMask).orR
194+
entry.rdy := !conflict(in) && !mpBlock && !s1Block && !noFreeWay// && !Cat(depMask).orR
181195
entry.task := io.in.bits
182196
entry.waitMP := Cat(
183197
s1Block,
@@ -247,7 +261,7 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 4)(implicit p: Paramete
247261
// update info
248262
e.waitMS := waitMSUpdate
249263
// e.depMask := depMaskUpdate
250-
e.rdy := !waitMSUpdate.orR && !e.waitMP && !s1_Block
264+
e.rdy := !waitMSUpdate.orR && !e.waitMP && !s1_Block && !noFreeWay
251265
}
252266
}
253267

src/main/scala/coupledL2/prefetch/BestOffsetPrefetch.scala

+5-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package coupledL2.prefetch
1919

20-
import utility.{GatedValidRegNext, ChiselDB, Constantin, MemReqSource, ParallelPriorityMux, RRArbiterInit, SRAMTemplate}
20+
import utility.{ChiselDB, Constantin, MemReqSource, ParallelPriorityMux, RRArbiterInit, SRAMTemplate}
2121
import org.chipsalliance.cde.config.Parameters
2222
import chisel3.DontCare.:=
2323
import chisel3._
@@ -177,13 +177,13 @@ class RecentRequestTable(implicit p: Parameters) extends BOPModule {
177177
rrTable.io.r.req.bits.setIdx := idx(rAddr)
178178
rData := rrTable.io.r.resp.data(0)
179179

180-
assert(!GatedValidRegNext(io.w.fire && io.r.req.fire), "single port SRAM should not read and write at the same time")
180+
assert(!RegNext(io.w.fire && io.r.req.fire), "single port SRAM should not read and write at the same time")
181181

182182
io.w.ready := rrTable.io.w.req.ready && !io.r.req.valid
183183
io.r.req.ready := true.B
184-
io.r.resp.valid := GatedValidRegNext(rrTable.io.r.req.fire, false.B)
185-
io.r.resp.bits.ptr := RegEnable(io.r.req.bits.ptr, rrTable.io.r.req.fire)
186-
io.r.resp.bits.hit := rData.valid && rData.tag === RegEnable(tag(rAddr), rrTable.io.r.req.fire)
184+
io.r.resp.valid := RegNext(rrTable.io.r.req.fire, false.B)
185+
io.r.resp.bits.ptr := RegNext(io.r.req.bits.ptr)
186+
io.r.resp.bits.hit := rData.valid && rData.tag === RegNext(tag(rAddr))
187187

188188
}
189189

src/main/scala/coupledL2/tl2chi/MSHR.scala

+2-1
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,8 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module {
491491
* 3. If the snoop is SnpCleanShared
492492
* Otherwise, the dirty bit should stay the same as before.
493493
*/
494-
dirty = !snpToN && !snpToB && req_chiOpcode =/= SnpCleanShared && meta.dirty,
494+
dirty = !snpToN && !snpToB && req_chiOpcode =/= SnpCleanShared && meta.dirty ||
495+
isSnpOnceX(req_chiOpcode) && probeDirty,
495496
state = Mux(
496497
snpToN,
497498
INVALID,

src/main/scala/coupledL2/tl2chi/MSHRCtl.scala

+49-24
Original file line numberDiff line numberDiff line change
@@ -120,46 +120,68 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module {
120120
mshrSelector.io.idle := mshrs.map(m => !m.io.status.valid)
121121
io.toMainPipe.mshr_alloc_ptr := OHToUInt(selectedMSHROH)
122122

123+
/*
124+
rxrsp for PCredit timing is quite critical and break it here
125+
*/
126+
val rxrspValid = RegNext(io.resps.rxrsp.valid)
127+
val rxrspInfo = RegNext(io.resps.rxrsp.respInfo)
128+
val rxrspMshrId = RegNext( io.resps.rxrsp.mshrId)
129+
123130
/*
124131
when PCrdGrant, give credit to one entry that:
125132
1. got RetryAck and not Reissued
126133
2. match srcID and PCrdType
127134
3. use Round-Robin arbiter if multi-entry match
128135
*/
129136
val isPCrdGrant = io.resps.rxrsp.valid && (io.resps.rxrsp.respInfo.chiOpcode.get === PCrdGrant)
137+
val isPCrdGrantReg = RegNext(isPCrdGrant)
130138
val waitPCrdInfo = Wire(Vec(mshrsAll, new PCrdInfo))
131-
// val pArb = Module(new RRArbiter(UInt(), mshrsAll))
139+
val timeOutPri = VecInit(Seq.fill(16)(false.B))
140+
val timeOutSel = WireInit(false.B)
141+
val pCrdPri = VecInit(Seq.fill(16)(false.B))
142+
val pArb = Module(new RRArbiter(UInt(), mshrsAll))
132143

133144
val matchPCrdGrant = VecInit(waitPCrdInfo.map(p =>
134145
isPCrdGrant && p.valid &&
135146
p.srcID.get === io.resps.rxrsp.respInfo.srcID.get &&
136147
p.pCrdType.get === io.resps.rxrsp.respInfo.pCrdType.get
137148
))
138149

139-
/* pArb.io.in.zipWithIndex.foreach {
140-
case (in, i) =>
141-
in.valid := matchPCrdGrant(i)
150+
val matchPCrdGrantReg = RegNext(matchPCrdGrant)
151+
pArb.io.in.zipWithIndex.foreach {
152+
case (in, i) =>
153+
in.valid := matchPCrdGrantReg(i)
142154
in.bits := 0.U
143155
}
144156
pArb.io.out.ready := true.B
145-
val pCrdRR = VecInit(UIntToOH(pArb.io.chosen))
146-
val pCrdPri = VecInit((matchPCrdGrant.asUInt & pCrdRR.asUInt).asBools)
147-
//val pCrdPri = VecInit(PriorityEncoderOH(matchPCrdGrant))
148-
val pCrdIsWait = OHToUInt(pCrdPri)
149-
*/
150157

151-
/*
152-
Random arbiter if multi-entry match
153-
*/
154-
val lfsr = LFSR(16, true.B)
155-
val idx = Random(16, lfsr)
156-
val idxOH = VecInit(UIntToOH(idx))
158+
val pCrdOH = VecInit(UIntToOH(pArb.io.chosen).asBools)
159+
val pCrdFixPri = VecInit(pCrdOH zip matchPCrdGrantReg map {case(a,b) => a && b})
160+
//val pCrdFixPri = VecInit(PriorityEncoderOH(matchPCrdGrantReg)) //fix priority arbiter
161+
162+
// timeout protect
163+
val counter = RegInit(VecInit(Seq.fill(mshrsAll)(0.U((log2Ceil(mshrsAll)+1).W))))
164+
165+
for(i <- 0 until 16) {
166+
when(matchPCrdGrantReg(i)) {
167+
when(!timeOutSel && pCrdFixPri(i) || timeOutPri(i)) {
168+
counter(i):=0.U
169+
}.otherwise {
170+
counter(i):= counter(i) + 1.U
171+
}
172+
}
173+
}
174+
val timeOutOH = PriorityEncoderOH(counter.map(_>=12.U) zip matchPCrdGrantReg map {case(a,b) => a&&b})
175+
timeOutPri := VecInit(timeOutOH)
176+
177+
timeOutSel := timeOutPri.reduce(_|_)
178+
pCrdPri := Mux(timeOutSel, timeOutPri, pCrdFixPri)
157179

158-
val doubleReq = Fill(2, matchPCrdGrant.asUInt)
159-
val doubleGnt = ~(doubleReq - idxOH.asUInt) & doubleReq
160-
val gnt = doubleGnt(31,16) | doubleGnt(15,0)
161-
val pCrdPri = VecInit(gnt.asBools)
162-
val pCrdIsWait = OHToUInt(pCrdPri)
180+
dontTouch (timeOutPri)
181+
dontTouch (timeOutSel)
182+
dontTouch (pCrdOH)
183+
dontTouch (pCrdFixPri)
184+
dontTouch (pCrdPri)
163185

164186
/* when PCrdGrant come before RetryAck, 16 entry CAM used to:
165187
1. save {srcID, PCrdType}
@@ -171,7 +193,8 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module {
171193
val pCamValids = Cat(pCam.map(_.valid))
172194
val enqIdx = PriorityEncoder(~pCamValids.asUInt)
173195

174-
when (isPCrdGrant && !pCrdIsWait.orR){
196+
// when (isPCrdGrant && !pCrdIsWait.orR){
197+
when (isPCrdGrant){
175198
pCam(enqIdx).valid := true.B
176199
pCam(enqIdx).srcID.get := io.resps.rxrsp.respInfo.srcID.get
177200
pCam(enqIdx).pCrdType.get := io.resps.rxrsp.respInfo.pCrdType.get
@@ -214,8 +237,10 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module {
214237
m.io.resps.rxdat.valid := m.io.status.valid && io.resps.rxdat.valid && io.resps.rxdat.mshrId === i.U
215238
m.io.resps.rxdat.bits := io.resps.rxdat.respInfo
216239

217-
m.io.resps.rxrsp.valid := (m.io.status.valid && io.resps.rxrsp.valid && !isPCrdGrant && io.resps.rxrsp.mshrId === i.U) || (isPCrdGrant && pCrdPri(i))
218-
m.io.resps.rxrsp.bits := io.resps.rxrsp.respInfo
240+
// m.io.resps.rxrsp.valid := (m.io.status.valid && io.resps.rxrsp.valid && !isPCrdGrant && io.resps.rxrsp.mshrId === i.U) || (isPCrdGrant && pCrdPri(i))
241+
// m.io.resps.rxrsp.bits := io.resps.rxrsp.respInfo
242+
m.io.resps.rxrsp.valid := (m.io.status.valid && rxrspValid && !isPCrdGrantReg && rxrspMshrId === i.U) || (isPCrdGrantReg && pCrdPri(i))
243+
m.io.resps.rxrsp.bits := rxrspInfo
219244

220245
m.io.replResp.valid := io.replResp.valid && io.replResp.bits.mshrId === i.U
221246
m.io.replResp.bits := io.replResp.bits
@@ -226,7 +251,7 @@ class MSHRCtl(implicit p: Parameters) extends TL2CHIL2Module {
226251
m.io.aMergeTask.bits := io.aMergeTask.bits.task
227252

228253
waitPCrdInfo(i) := m.io.waitPCrdInfo
229-
m.io.pCamPri := (pCamPri === i.U) && waitPCrdInfo(i).valid
254+
m.io.pCamPri := 0.U /*(pCamPri === i.U) && waitPCrdInfo(i).valid*/
230255
}
231256
/* Reserve 1 entry for SinkB */
232257
io.waitPCrdInfo <> waitPCrdInfo

src/main/scala/coupledL2/tl2chi/RXRSP.scala

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class RXRSP(implicit p: Parameters) extends TL2CHIL2Module {
4545
io.in.respInfo.txnID.get := io.out.bits.txnID
4646
io.in.respInfo.srcID.get := io.out.bits.srcID
4747
io.in.respInfo.dbID.get := io.out.bits.dbID
48+
io.in.respInfo.resp.get := io.out.bits.resp
4849
io.in.respInfo.pCrdType.get := io.out.bits.pCrdType
4950
io.in.respInfo.last := true.B
5051

src/main/scala/coupledL2/tl2chi/Slice.scala

+1
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle]
107107
reqBuf.io.mshrInfo := mshrCtl.io.msInfo
108108
reqBuf.io.mainPipeBlock := mainPipe.io.toReqBuf
109109
reqBuf.io.s1Entrance := reqArb.io.s1Entrance
110+
reqBuf.io.taskFromArb_s2 := reqArb.io.taskToPipe_s2
110111

111112
mainPipe.io.taskFromArb_s2 := reqArb.io.taskToPipe_s2
112113
mainPipe.io.taskInfo_s1 := reqArb.io.taskInfo_s1

src/main/scala/coupledL2/tl2chi/TXREQ.scala

+6-3
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,21 @@ class TXREQ(implicit p: Parameters) extends TL2CHIL2Module {
4545
require(chiOpt.isDefined)
4646

4747
// TODO: an mshrsAll-entry queue is too much, evaluate for a proper size later
48-
val queue = Module(new Queue(new CHIREQ, entries = mshrsAll, flow = true))
48+
val queue = Module(new Queue(new CHIREQ, entries = mshrsAll, flow = false))
4949

5050
// Back pressure logic from TXREQ
5151
val queueCnt = queue.io.count
5252
// TODO: this may be imprecise, review this later
5353
val pipeStatus_s1_s5 = io.pipeStatusVec
5454
val pipeStatus_s2_s5 = pipeStatus_s1_s5.tail
5555
val pipeStatus_s1 = pipeStatus_s1_s5.head
56+
val pipeStatus_s2 = pipeStatus_s1_s5(1)
57+
val s2ReturnCredit = pipeStatus_s2.valid && !(pipeStatus_s2.bits.mshrTask && pipeStatus_s2.bits.toTXREQ)
5658
// inflightCnt equals the number of reqs on s2~s5 that may flow into TXREQ soon, plus queueCnt.
5759
// The calculation of inflightCnt might be imprecise and leads to false positive back pressue.
5860
val inflightCnt = PopCount(Cat(pipeStatus_s2_s5.map(s => s.valid && s.bits.mshrTask && s.bits.toTXREQ))) +
59-
pipeStatus_s1.valid.asUInt +
61+
// pipeStatus_s1.valid.asUInt +
62+
1.U - s2ReturnCredit.asUInt + //Fix Timing: always take credit and s2 return if not take
6063
queueCnt
6164
val noSpace = inflightCnt >= mshrsAll.U
6265

@@ -73,4 +76,4 @@ class TXREQ(implicit p: Parameters) extends TL2CHIL2Module {
7376
io.out.bits.tgtID := SAM(sam).lookup(io.out.bits.addr)
7477
io.out.bits.size := log2Ceil(blockBytes).U(SIZE_WIDTH.W) // TODO
7578
io.out.bits.addr := restoreAddressUInt(queue.io.deq.bits.addr, io.sliceId)
76-
}
79+
}

src/main/scala/coupledL2/tl2chi/TXRSP.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class TXRSP(implicit p: Parameters) extends TL2CHIL2Module {
4545
require(chiOpt.isDefined)
4646

4747
// TODO: an mshrsAll-entry queue is too much, evaluate for a proper size later
48-
val queue = Module(new Queue(new CHIRSP, entries = mshrsAll, flow = true))
48+
val queue = Module(new Queue(new CHIRSP, entries = mshrsAll, flow = false))
4949

5050
// Back pressure logic from TXRSP
5151
val queueCnt = queue.io.count
@@ -88,4 +88,4 @@ class TXRSP(implicit p: Parameters) extends TL2CHIL2Module {
8888
// TODO: Finish this
8989
rsp
9090
}
91-
}
91+
}

src/main/scala/coupledL2/tl2tl/Slice.scala

+2
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ class Slice()(implicit p: Parameters) extends BaseSlice[OuterBundle] {
5959
a_reqBuf.io.mshrInfo := mshrCtl.io.msInfo
6060
a_reqBuf.io.mainPipeBlock := mainPipe.io.toReqBuf
6161
a_reqBuf.io.s1Entrance := reqArb.io.s1Entrance
62+
a_reqBuf.io.taskFromArb_s2 := reqArb.io.taskToPipe_s2
63+
6264
sinkB.io.msInfo := mshrCtl.io.msInfo
6365
sinkC.io.msInfo := mshrCtl.io.msInfo
6466

0 commit comments

Comments
 (0)