From 5ee2491358a16204fd3fd1a4cb26330c473c7c7a Mon Sep 17 00:00:00 2001 From: zhanglinjuan Date: Mon, 6 May 2024 20:50:45 +0800 Subject: [PATCH 1/2] RequestArb: restrict concurrent number of Acquires with same set Consider a deadlock situation: given 4 ways in each slice, MSHR 0~3 are accessing separate ways, which makes all the ways occupied. At the same time, MSHR 4 already got CompData from HN, and is waiting for a free way to replace and to echo data through channel D to L1. Therefore MSHR 4 selects replacement way but fails repeatedly. If there is an incoming snoop that has the same address with MSHR 4, the snoop will be blocked because Grant message of MSHR 4 is blocked. Then CompData(s) corresponding to MSHR 0~3 could not be returned due to the blocked snoop. There goes the deadlock. --- src/main/scala/coupledL2/Common.scala | 2 +- src/main/scala/coupledL2/RequestArb.scala | 14 +++++++++++++- src/main/scala/coupledL2/tl2chi/MSHR.scala | 1 + src/main/scala/coupledL2/tl2chi/Slice.scala | 1 + src/main/scala/coupledL2/tl2tl/MSHR.scala | 1 + src/main/scala/coupledL2/tl2tl/Slice.scala | 1 + 6 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/main/scala/coupledL2/Common.scala b/src/main/scala/coupledL2/Common.scala index a9393dac6..7d5036cb6 100644 --- a/src/main/scala/coupledL2/Common.scala +++ b/src/main/scala/coupledL2/Common.scala @@ -173,7 +173,7 @@ class MSHRRequest(implicit p: Parameters) extends L2Bundle { } // MSHR info to ReqBuf and SinkB -class MSHRInfo(implicit p: Parameters) extends L2Bundle { +class MSHRInfo(implicit p: Parameters) extends L2Bundle with HasTLChannelBits { val set = UInt(setBits.W) val way = UInt(wayBits.W) val reqTag = UInt(tagBits.W) diff --git a/src/main/scala/coupledL2/RequestArb.scala b/src/main/scala/coupledL2/RequestArb.scala index 820c11f4a..68ae4b734 100644 --- a/src/main/scala/coupledL2/RequestArb.scala +++ b/src/main/scala/coupledL2/RequestArb.scala @@ -70,6 +70,9 @@ class RequestArb(implicit p: Parameters) extends L2Module { val fromTXDAT = if (enableCHI) Some(Input(new TXDATBlockBundle)) else None val fromTXRSP = if (enableCHI) Some(Input(new TXRSPBlockBundle)) else None val fromTXREQ = if (enableCHI) Some(Input(new TXBlockBundle)) else None + + /* MSHR Status */ + val msInfo = Vec(mshrsAll, Flipped(ValidIO(new MSHRInfo()))) }) /* ======== Reset ======== */ @@ -137,7 +140,9 @@ class RequestArb(implicit p: Parameters) extends L2Module { val sink_ready_basic = io.dirRead_s1.ready && resetFinish && !mshr_task_s1.valid && s2_ready - io.sinkA.ready := sink_ready_basic && !block_A && !sinkValids(1) && !sinkValids(0) // SinkC prior to SinkA & SinkB + val noFreeWay = Wire(Bool()) + + io.sinkA.ready := sink_ready_basic && !block_A && !sinkValids(1) && !sinkValids(0) && !noFreeWay // SinkC prior to SinkA & SinkB io.sinkB.ready := sink_ready_basic && !block_B && !sinkValids(0) // SinkB prior to SinkA io.sinkC.ready := sink_ready_basic && !block_C @@ -189,6 +194,13 @@ class RequestArb(implicit p: Parameters) extends L2Module { task_s2.valid := s1_fire when(s1_fire) { task_s2.bits := task_s1.bits } + val sameSet_s2 = task_s2.valid && task_s2.bits.fromA && !task_s2.bits.mshrTask && task_s2.bits.set === A_task.set + val sameSet_s3 = RegNext(task_s2.valid && task_s2.bits.fromA && !task_s2.bits.mshrTask) && + RegEnable(task_s2.bits.set, task_s2.valid) === A_task.set + val sameSetCnt = PopCount(VecInit(io.msInfo.map(s => s.valid && s.bits.set === A_task.set && s.bits.fromA) :+ + sameSet_s2 :+ sameSet_s3).asUInt) + noFreeWay := sameSetCnt >= cacheParams.ways.U + io.taskToPipe_s2 := task_s2 // MSHR task diff --git a/src/main/scala/coupledL2/tl2chi/MSHR.scala b/src/main/scala/coupledL2/tl2chi/MSHR.scala index 1dd77adf3..0dbc80072 100644 --- a/src/main/scala/coupledL2/tl2chi/MSHR.scala +++ b/src/main/scala/coupledL2/tl2chi/MSHR.scala @@ -958,6 +958,7 @@ class MSHR(implicit p: Parameters) extends TL2CHIL2Module { io.msInfo.bits.w_replResp := state.w_replResp io.msInfo.bits.w_rprobeacklast := state.w_rprobeacklast io.msInfo.bits.replaceData := isT(meta.state) && meta.dirty || probeDirty + io.msInfo.bits.channel := req.channel assert(!(c_resp.valid && !io.status.bits.w_c_resp)) assert(!(rxrsp.valid && !io.status.bits.w_d_resp)) diff --git a/src/main/scala/coupledL2/tl2chi/Slice.scala b/src/main/scala/coupledL2/tl2chi/Slice.scala index 22f55f9f1..c97aa005a 100644 --- a/src/main/scala/coupledL2/tl2chi/Slice.scala +++ b/src/main/scala/coupledL2/tl2chi/Slice.scala @@ -102,6 +102,7 @@ class Slice()(implicit p: Parameters) extends TL2CHIL2Module { reqArb.io.fromTXDAT.foreach(_ := txdat.io.toReqArb) reqArb.io.fromTXRSP.foreach(_ := txrsp.io.toReqArb) reqArb.io.fromTXREQ.foreach(_ := txreq.io.toReqArb) + reqArb.io.msInfo := mshrCtl.io.msInfo reqBuf.io.in <> sinkA.io.task reqBuf.io.mshrInfo := mshrCtl.io.msInfo diff --git a/src/main/scala/coupledL2/tl2tl/MSHR.scala b/src/main/scala/coupledL2/tl2tl/MSHR.scala index 8e42a5b5e..c9aec5cef 100644 --- a/src/main/scala/coupledL2/tl2tl/MSHR.scala +++ b/src/main/scala/coupledL2/tl2tl/MSHR.scala @@ -553,6 +553,7 @@ class MSHR(implicit p: Parameters) extends L2Module { io.msInfo.bits.w_releaseack := state.w_releaseack io.msInfo.bits.w_replResp := state.w_replResp io.msInfo.bits.w_rprobeacklast := state.w_rprobeacklast + io.msInfo.bits.channel := req.channel assert(!(c_resp.valid && !io.status.bits.w_c_resp)) assert(!(d_resp.valid && !io.status.bits.w_d_resp)) diff --git a/src/main/scala/coupledL2/tl2tl/Slice.scala b/src/main/scala/coupledL2/tl2tl/Slice.scala index 70ea2c848..5109f6ddd 100644 --- a/src/main/scala/coupledL2/tl2tl/Slice.scala +++ b/src/main/scala/coupledL2/tl2tl/Slice.scala @@ -81,6 +81,7 @@ class Slice()(implicit p: Parameters) extends L2Module { reqArb.io.fromMainPipe := mainPipe.io.toReqArb reqArb.io.fromGrantBuffer := grantBuf.io.toReqArb reqArb.io.fromSourceC.foreach(_ := sourceC.io.toReqArb) + reqArb.io.msInfo := mshrCtl.io.msInfo mshrCtl.io.fromReqArb.status_s1 := reqArb.io.status_s1 mshrCtl.io.resps.sinkC := sinkC.io.resp From b2cd68ba8986e534d7039ad05442dfd0037ceb8f Mon Sep 17 00:00:00 2001 From: Kumonda221 Date: Tue, 7 May 2024 19:02:43 +0800 Subject: [PATCH 2/2] RequestArb: apply the way restriction to both sides of inter-stage handshake Co-authored-by: zhanglinjuan --- src/main/scala/coupledL2/RequestArb.scala | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/scala/coupledL2/RequestArb.scala b/src/main/scala/coupledL2/RequestArb.scala index 68ae4b734..ee0fca391 100644 --- a/src/main/scala/coupledL2/RequestArb.scala +++ b/src/main/scala/coupledL2/RequestArb.scala @@ -107,7 +107,7 @@ class RequestArb(implicit p: Parameters) extends L2Module { (if (io.fromTXRSP.isDefined) !io.fromTXRSP.get.blockMSHRReqEntrance else true.B) && (if (io.fromTXREQ.isDefined) !io.fromTXREQ.get.blockMSHRReqEntrance else true.B) - s0_fire := io.mshrTask.valid && io.mshrTask.ready; + s0_fire := io.mshrTask.valid && io.mshrTask.ready /* ======== Stage 1 ======== */ /* latch mshr_task from s0 to s1 */ @@ -130,18 +130,18 @@ class RequestArb(implicit p: Parameters) extends L2Module { (if (io.fromTXRSP.isDefined) io.fromTXRSP.get.blockSinkBReqEntrance else false.B) val block_C = io.fromMSHRCtl.blockC_s1 || io.fromMainPipe.blockC_s1 || io.fromGrantBuffer.blockSinkReqEntrance.blockC_s1 + val noFreeWay = Wire(Bool()) + val sinkValids = VecInit(Seq( io.sinkC.valid && !block_C, io.sinkB.valid && !block_B, - io.sinkA.valid && !block_A + io.sinkA.valid && !block_A && !noFreeWay )).asUInt // TODO: A Hint is allowed to enter if !s2_ready for mcp2_stall val sink_ready_basic = io.dirRead_s1.ready && resetFinish && !mshr_task_s1.valid && s2_ready - val noFreeWay = Wire(Bool()) - io.sinkA.ready := sink_ready_basic && !block_A && !sinkValids(1) && !sinkValids(0) && !noFreeWay // SinkC prior to SinkA & SinkB io.sinkB.ready := sink_ready_basic && !block_B && !sinkValids(0) // SinkB prior to SinkA io.sinkC.ready := sink_ready_basic && !block_C @@ -155,8 +155,8 @@ class RequestArb(implicit p: Parameters) extends L2Module { val task_s1 = Mux(mshr_task_s1.valid, mshr_task_s1, chnl_task_s1) val s1_to_s2_valid = task_s1.valid && !mshr_replRead_stall - s1_fire := s1_cango && s2_ready; - s1_cango := task_s1.valid && !mshr_replRead_stall; + s1_cango := task_s1.valid && !mshr_replRead_stall + s1_fire := s1_cango && s2_ready io.taskInfo_s1.valid := s1_to_s2_valid io.taskInfo_s1.bits := task_s1.bits @@ -188,7 +188,7 @@ class RequestArb(implicit p: Parameters) extends L2Module { // any req except AHint might access DS, and continuous DS accesses are prohibited val ds_mcp2_stall = RegNext(s1_fire && !s1_AHint_fire) - s2_ready := !ds_mcp2_stall; + s2_ready := !ds_mcp2_stall val task_s2 = RegInit(0.U.asTypeOf(task_s1)) task_s2.valid := s1_fire