Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge branch 'master' into chi-coupledl2 #144

Merged
merged 11 commits into from
May 7, 2024
22 changes: 16 additions & 6 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ name: CI
on:
# Triggers the workflow on push or pull request events but only for the main branch
push:
branches: [ chi-coupledl2, chi-coupledl2-ci-test ]
branches: [ master, chi-coupledl2 ]
pull_request:
branches: [ chi-coupledl2, chi-coupledl2-ci-test ]
branches: [ master, chi-coupledl2 ]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
Expand Down Expand Up @@ -45,9 +45,19 @@ jobs:
# - name: Check scalafmt
# run: make checkformat

# - name: Compile
# run: make compile

- name: Compile QuadCore
- name: Compile
run: make compile

- name: Unit test for TileLink version
run: |
git clone https://github.com/OpenXiangShan/tl-test -b coupledL2-huancun
make test-top-l2l3l2
cd ./tl-test
mkdir build && cd build
cmake .. -DDUT_DIR=../../build -DCHISELDB=1
make
./tlc_test -s $RANDOM

- name: Compile CHI QuadCore
run: |
make test-top-chi-quadcore-2ul
45 changes: 45 additions & 0 deletions src/main/scala/coupledL2/Common.scala
Original file line number Diff line number Diff line change
Expand Up @@ -316,3 +316,48 @@ class L2ToL1Hint(implicit p: Parameters) extends Bundle {
val sourceId = UInt(32.W) // tilelink sourceID
val isKeyword = Bool() // miss entry keyword
}

// custom l2 - l1 tlb
// FIXME lyq: Tlbcmd and TlbExceptionBundle, how to use L1 corresponding bundles?
object TlbCmd {
def read = "b00".U
def write = "b01".U
def exec = "b10".U

def atom_read = "b100".U // lr
def atom_write = "b101".U // sc / amo

def apply() = UInt(3.W)
def isRead(a: UInt) = a(1,0)===read
def isWrite(a: UInt) = a(1,0)===write
def isExec(a: UInt) = a(1,0)===exec

def isAtom(a: UInt) = a(2)
def isAmo(a: UInt) = a===atom_write // NOTE: sc mixed
}
class TlbExceptionBundle extends Bundle {
val ld = Output(Bool())
val st = Output(Bool())
val instr = Output(Bool())
}
class L2TlbReq(implicit p: Parameters) extends L2Bundle{
val vaddr = Output(UInt((fullVAddrBits+offsetBits).W))
val cmd = Output(TlbCmd())
val size = Output(UInt(log2Ceil(log2Ceil(XLEN/8) + 1).W))
val kill = Output(Bool()) // Use for blocked tlb that need sync with other module like icache
val no_translate = Output(Bool()) // do not translate, but still do pmp/pma check
}
class L2TlbResp(nDups: Int = 1)(implicit p: Parameters) extends L2Bundle {
val paddr = Vec(nDups, Output(UInt(fullAddressBits.W)))
val miss = Output(Bool())
val excp = Vec(nDups, new Bundle {
val gpf = new TlbExceptionBundle()
val pf = new TlbExceptionBundle()
val af = new TlbExceptionBundle()
})
}
class L2ToL1TlbIO(nRespDups: Int = 1)(implicit p: Parameters) extends L2Bundle{
val req = DecoupledIO(new L2TlbReq)
val req_kill = Output(Bool())
val resp = Flipped(DecoupledIO(new L2TlbResp(nRespDups)))
}
3 changes: 3 additions & 0 deletions src/main/scala/coupledL2/CoupledL2.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ trait HasCoupledL2Parameters {
val enableCHI = p(EnableCHI)
val cacheParams = p(L2ParamKey)

val XLEN = 64
val blocks = cacheParams.sets * cacheParams.ways
val blockBytes = cacheParams.blockBytes
val beatBytes = cacheParams.channelBytes.d.get
Expand All @@ -51,8 +52,10 @@ trait HasCoupledL2Parameters {
val chiOpt = if (enableCHI) Some(true) else None
val aliasBitsOpt = if(cacheParams.clientCaches.isEmpty) None
else cacheParams.clientCaches.head.aliasBitsOpt
// vaddr without offset bits
val vaddrBitsOpt = if(cacheParams.clientCaches.isEmpty) None
else cacheParams.clientCaches.head.vaddrBitsOpt
val fullVAddrBits = vaddrBitsOpt.getOrElse(0) + offsetBits
// from L1 load miss cache require
val isKeywordBitsOpt = if(cacheParams.clientCaches.isEmpty) None
else cacheParams.clientCaches.head.isKeywordBitsOpt
Expand Down
6 changes: 6 additions & 0 deletions src/main/scala/coupledL2/GrantBuffer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ class GrantBuffer(implicit p: Parameters) extends L2Module {
val pftRespEntry = new Bundle() {
val tag = UInt(tagBits.W)
val set = UInt(setBits.W)
val vaddr = vaddrBitsOpt.map(_ => UInt(vaddrBitsOpt.get.W))
val pfSource = UInt(MemReqSource.reqSourceBits.W)
}
// TODO: this may not need 10 entries, but this does not take much space
val pftQueueLen = 10
Expand All @@ -225,11 +227,15 @@ class GrantBuffer(implicit p: Parameters) extends L2Module {
io.d_task.bits.task.fromL2pft.getOrElse(false.B)
pftRespQueue.get.io.enq.bits.tag := io.d_task.bits.task.tag
pftRespQueue.get.io.enq.bits.set := io.d_task.bits.task.set
pftRespQueue.get.io.enq.bits.vaddr.foreach(_ := io.d_task.bits.task.vaddr.getOrElse(0.U))
pftRespQueue.get.io.enq.bits.pfSource := io.d_task.bits.task.reqSource

val resp = io.prefetchResp.get
resp.valid := pftRespQueue.get.io.deq.valid
resp.bits.tag := pftRespQueue.get.io.deq.bits.tag
resp.bits.set := pftRespQueue.get.io.deq.bits.set
resp.bits.vaddr.foreach(_ := pftRespQueue.get.io.deq.bits.vaddr.getOrElse(0.U))
resp.bits.pfSource := pftRespQueue.get.io.deq.bits.pfSource
pftRespQueue.get.io.deq.ready := resp.ready

assert(pftRespQueue.get.io.enq.ready, "pftRespQueue should never be full, no back pressure logic")
Expand Down
8 changes: 4 additions & 4 deletions src/main/scala/coupledL2/SinkA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ class SinkA(implicit p: Parameters) extends L2Module {
task.mshrId := 0.U(mshrBits.W)
task.aliasTask.foreach(_ := false.B)
task.useProbeData := false.B
task.fromL2pft.foreach(_ := req.needAck)
task.mshrRetry := false.B
task.fromL2pft.foreach(_ := req.isBOP)
task.needHint.foreach(_ := false.B)
task.dirty := false.B
task.way := 0.U(wayBits.W)
Expand All @@ -105,7 +105,7 @@ class SinkA(implicit p: Parameters) extends L2Module {
task.wayMask := 0.U(cacheParams.ways.W)
task.reqSource := req.pfSource
task.replTask := false.B
task.vaddr.foreach(_ := 0.U)
task.vaddr.foreach(_ := req.vaddr.getOrElse(0.U))
task.isKeyword.foreach(_ := false.B)
task.mergeA := false.B
task.aMergeTask := 0.U.asTypeOf(new MergeTaskBundle)
Expand Down Expand Up @@ -137,8 +137,8 @@ class SinkA(implicit p: Parameters) extends L2Module {
prefetchOpt.foreach {
_ =>
XSPerfAccumulate(cacheParams, "sinkA_prefetch_req", io.prefetchReq.get.fire)
XSPerfAccumulate(cacheParams, "sinkA_prefetch_from_l2", io.prefetchReq.get.bits.isBOP && io.prefetchReq.get.fire)
XSPerfAccumulate(cacheParams, "sinkA_prefetch_from_l1", !io.prefetchReq.get.bits.isBOP && io.prefetchReq.get.fire)
XSPerfAccumulate(cacheParams, "sinkA_prefetch_from_l2", io.prefetchReq.get.bits.fromL2 && io.prefetchReq.get.fire)
XSPerfAccumulate(cacheParams, "sinkA_prefetch_from_l1", !io.prefetchReq.get.bits.fromL2 && io.prefetchReq.get.fire)
}

// cycels stalled by mainpipe
Expand Down
49 changes: 34 additions & 15 deletions src/main/scala/coupledL2/TopDownMonitor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class TopDownMonitor()(implicit p: Parameters) extends L2Module {
val msStatus = Vec(banks, Vec(mshrsAll, Flipped(ValidIO(new MSHRStatus))))
val latePF = Vec(banks, Input(Bool()))
val debugTopDown = new Bundle {
val robTrueCommit = Input(UInt(64.W))
val robHeadPaddr = Vec(cacheParams.hartIds.length, Flipped(Valid(UInt(36.W))))
val l2MissMatch = Vec(cacheParams.hartIds.length, Output(Bool()))
}
Expand Down Expand Up @@ -114,6 +115,7 @@ class TopDownMonitor()(implicit p: Parameters) extends L2Module {
val l2prefetchSent = dirResultMatchVec(
r => !r.hit &&
(r.replacerInfo.reqSource === MemReqSource.Prefetch2L2BOP.id.U ||
r.replacerInfo.reqSource === MemReqSource.Prefetch2L2PBOP.id.U ||
r.replacerInfo.reqSource === MemReqSource.Prefetch2L2SMS.id.U ||
r.replacerInfo.reqSource === MemReqSource.Prefetch2L2Stride.id.U ||
r.replacerInfo.reqSource === MemReqSource.Prefetch2L2Stream.id.U ||
Expand All @@ -122,6 +124,9 @@ class TopDownMonitor()(implicit p: Parameters) extends L2Module {
val l2prefetchSentBOP = dirResultMatchVec(
r => !r.hit && r.replacerInfo.reqSource === MemReqSource.Prefetch2L2BOP.id.U
)
val l2prefetchSentPBOP = dirResultMatchVec(
r => !r.hit && r.replacerInfo.reqSource === MemReqSource.Prefetch2L2PBOP.id.U
)
val l2prefetchSentSMS = dirResultMatchVec(
r => !r.hit && r.replacerInfo.reqSource === MemReqSource.Prefetch2L2SMS.id.U
)
Expand All @@ -142,6 +147,10 @@ class TopDownMonitor()(implicit p: Parameters) extends L2Module {
r => reqFromCPU(r) && r.hit &&
r.meta.prefetch.getOrElse(false.B) && r.meta.prefetchSrc.getOrElse(PfSource.NoWhere.id.U) === PfSource.BOP.id.U
)
val l2prefetchUsefulPBOP = dirResultMatchVec(
r => reqFromCPU(r) && r.hit &&
r.meta.prefetch.getOrElse(false.B) && r.meta.prefetchSrc.getOrElse(PfSource.NoWhere.id.U) === PfSource.PBOP.id.U
)
val l2prefetchUsefulSMS = dirResultMatchVec(
r => reqFromCPU(r) && r.hit &&
r.meta.prefetch.getOrElse(false.B) && r.meta.prefetchSrc.getOrElse(PfSource.NoWhere.id.U) === PfSource.SMS.id.U
Expand All @@ -168,81 +177,91 @@ class TopDownMonitor()(implicit p: Parameters) extends L2Module {
XSPerfRolling(
cacheParams, "L2PrefetchAccuracy",
PopCount(l2prefetchUseful), PopCount(l2prefetchSent),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchAccuracyBOP",
PopCount(l2prefetchUsefulBOP), PopCount(l2prefetchSentBOP),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchAccuracyPBOP",
PopCount(l2prefetchUsefulPBOP), PopCount(l2prefetchSentPBOP),
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchAccuracySMS",
PopCount(l2prefetchUsefulSMS), PopCount(l2prefetchSentSMS),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchAccuracyTP",
PopCount(l2prefetchUsefulTP), PopCount(l2prefetchSentTP),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchAccuracyStride",
PopCount(l2prefetchUsefulStride), PopCount(l2prefetchSentStride),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchAccuracyStream",
PopCount(l2prefetchUsefulStream), PopCount(l2prefetchSentStream),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchAccuracyTP",
PopCount(l2prefetchUsefulTP), PopCount(l2prefetchSentTP),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)

// PF Late
XSPerfRolling(
cacheParams, "L2PrefetchLate",
PopCount(l2prefetchLate), PopCount(l2prefetchUseful),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)

// PF Coverage
XSPerfRolling(
cacheParams, "L2PrefetchCoverage",
PopCount(l2prefetchUseful), PopCount(l2demandRequest),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchCoverageBOP",
PopCount(l2prefetchUsefulBOP), PopCount(l2demandRequest),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchCoveragePBOP",
PopCount(l2prefetchUsefulPBOP), PopCount(l2demandRequest),
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchCoverageSMS",
PopCount(l2prefetchUsefulSMS), PopCount(l2demandRequest),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchCoverageTP",
PopCount(l2prefetchUsefulTP), PopCount(l2demandRequest),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchCoverageStride",
PopCount(l2prefetchUsefulStride), PopCount(l2demandRequest),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchCoverageStream",
PopCount(l2prefetchUsefulStream), PopCount(l2demandRequest),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)
XSPerfRolling(
cacheParams, "L2PrefetchCoverageTP",
PopCount(l2prefetchUsefulTP), PopCount(l2demandRequest),
1000, clock, reset
1000, io.debugTopDown.robTrueCommit, clock, reset
)

XSPerfAccumulate(cacheParams, "l2prefetchSent", PopCount(l2prefetchSent))
Expand Down
Loading
Loading