Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(commit): complete rewrite of commit mechanism #4114

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/main/scala/xiangshan/backend/Bundles.scala
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ object Bundles {
val numWB = UInt(log2Up(MaxUopSize).W) // rob need this
val commitType = CommitType() // Todo: remove it
val needFrm = new NeedFrmBundle
val lastInFtqEntry = Bool()

val debug_fuType = OptionWrapper(backendParams.debugEn, FuType())

Expand Down Expand Up @@ -180,6 +181,7 @@ object Bundles {
val crossPageIPFFix = Bool()
val ftqPtr = new FtqPtr
val ftqOffset = UInt(log2Up(PredictWidth).W)
val ftqLastOffset = UInt(log2Up(PredictWidth).W)
// passed from DecodedInst
val srcType = Vec(numSrc, SrcType())
val ldest = UInt(LogicRegsWidth.W)
Expand All @@ -195,6 +197,8 @@ object Bundles {
val blockBackward = Bool()
val flushPipe = Bool() // This inst will flush all the pipe when commit, like exception but can commit
val canRobCompress = Bool()
val crossFtqCommit = Bool()
val crossFtq = Bool()
val selImm = SelImm()
val imm = UInt(32.W)
val fpu = new FPUCtrlSignals
Expand Down
25 changes: 18 additions & 7 deletions src/main/scala/xiangshan/backend/CtrlBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class CtrlBlockImp(
val intScheWbData = io.fromWB.wbData.filter(_.bits.params.schdType.isInstanceOf[IntScheduler])
val fpScheWbData = io.fromWB.wbData.filter(_.bits.params.schdType.isInstanceOf[FpScheduler])
val vfScheWbData = io.fromWB.wbData.filter(_.bits.params.schdType.isInstanceOf[VfScheduler])
val intCanCompress = intScheWbData.filter(_.bits.params.CanCompress)
val staScheWbData = io.fromWB.wbData.filter(_.bits.params.hasStoreAddrFu)
val i2vWbData = intScheWbData.filter(_.bits.params.writeVecRf)
val f2vWbData = fpScheWbData.filter(_.bits.params.writeVecRf)
val memVloadWbData = io.fromWB.wbData.filter(x => x.bits.params.schdType.isInstanceOf[MemScheduler] && x.bits.params.hasVLoadFu)
Expand All @@ -157,22 +157,23 @@ class CtrlBlockImp(
val killedByOlder = x.bits.robIdx.needFlush(Seq(s1_s3_redirect, s2_s4_redirect, s3_s5_redirect))
val delayed = Wire(Valid(UInt(io.fromWB.wbData.size.U.getWidth.W)))
delayed.valid := GatedValidRegNext(valid && !killedByOlder)
val isIntSche = intCanCompress.contains(x)
val isIntSche = intScheWbData.contains(x)
val isFpSche = fpScheWbData.contains(x)
val isVfSche = vfScheWbData.contains(x)
val isMemVload = memVloadWbData.contains(x)
val isi2v = i2vWbData.contains(x)
val isf2v = f2vWbData.contains(x)
val isStaSche = staScheWbData.contains(x)
val canSameRobidxWbData = if(isVfSche) {
i2vWbData ++ f2vWbData ++ vfScheWbData
} else if(isi2v) {
intCanCompress ++ fpScheWbData ++ vfScheWbData
intScheWbData ++ fpScheWbData ++ vfScheWbData ++ staScheWbData
} else if (isf2v) {
intCanCompress ++ fpScheWbData ++ vfScheWbData
intScheWbData ++ fpScheWbData ++ vfScheWbData ++ staScheWbData
} else if (isIntSche) {
intCanCompress ++ fpScheWbData
intScheWbData ++ fpScheWbData ++ staScheWbData
} else if (isFpSche) {
intCanCompress ++ fpScheWbData
intScheWbData ++ fpScheWbData ++ staScheWbData
} else if (isMemVload) {
memVloadWbData
} else {
Expand Down Expand Up @@ -511,6 +512,9 @@ class CtrlBlockImp(
rename.io.in(i).bits := decodePipeRename(i).bits
dispatch.io.renameIn(i).valid := decodePipeRename(i).valid && !fusionDecoder.io.clear(i) && !decodePipeRename(i).bits.isMove
dispatch.io.renameIn(i).bits := decodePipeRename(i).bits
rename.io.validVec(i) := decodePipeRename(i).valid
rename.io.isFusionVec(i) := false.B
rename.io.fusionCross2FtqVec(i) := false.B
}

for (i <- 0 until RenameWidth - 1) {
Expand All @@ -531,7 +535,14 @@ class CtrlBlockImp(
when (fusionDecoder.io.out(i).valid) {
fusionDecoder.io.out(i).bits.update(rename.io.in(i).bits)
fusionDecoder.io.out(i).bits.update(dispatch.io.renameIn(i).bits)
rename.io.in(i).bits.commitType := Mux(cond1, 4.U, Mux(cond2, 5.U, Mux(cond3, 6.U, 7.U)))
val cross2Ftq = decodePipeRename(i).bits.lastInFtqEntry && decodePipeRename(i + 1).bits.lastInFtqEntry
val cross1Ftq = decodePipeRename(i).bits.lastInFtqEntry || decodePipeRename(i + 1).bits.lastInFtqEntry
rename.io.in(i + 1).bits.lastInFtqEntry := cross1Ftq
rename.io.in(i + 1).bits.canRobCompress := !cross2Ftq
rename.io.in(i).bits.lastInFtqEntry := false.B
rename.io.in(i).bits.canRobCompress := !cross2Ftq
rename.io.isFusionVec(i) := true.B
rename.io.fusionCross2FtqVec(i) := cross2Ftq
}
XSError(fusionDecoder.io.out(i).valid && !cond1 && !cond2 && !cond3 && !cond4, p"new condition $sameFtqPtr $ftqOffset0 $ftqOffset1\n")
}
Expand Down
1 change: 1 addition & 0 deletions src/main/scala/xiangshan/backend/decode/DecodeUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,7 @@ class DecodeUnit(implicit p: Parameters) extends XSModule with DecodeUnitConstan

decodedInst.connectStaticInst(io.enq.ctrlFlow)

decodedInst.lastInFtqEntry := ctrl_flow.isLastInFtqEntry
decodedInst.uopIdx := 0.U
decodedInst.firstUop := true.B
decodedInst.lastUop := true.B
Expand Down
17 changes: 12 additions & 5 deletions src/main/scala/xiangshan/backend/dispatch/Dispatch.scala
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasPerfEvents {
}

val updatedUop = Wire(Vec(RenameWidth, new DynInst))
val toDqUop = Wire(Vec(RenameWidth, new DynInst))
val checkpoint_id = RegInit(0.U(64.W))
checkpoint_id := checkpoint_id + PopCount((0 until RenameWidth).map(i =>
io.fromRename(i).fire
Expand Down Expand Up @@ -270,6 +271,12 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasPerfEvents {
}
}
}
// Update ftqidx to dispatch: Due to branch instructions/store compression, the required ftqidx should correspond to the ftqidx of the last instruction in the compressed robentry.
for (i <- 0 until RenameWidth) {
toDqUop(i) := updatedUop(i)
toDqUop(i).ftqOffset := updatedUop(i).ftqLastOffset
toDqUop(i).ftqPtr := updatedUop(i).ftqPtr + updatedUop(i).crossFtq
}

// store set perf count
XSPerfAccumulate("waittable_load_wait", PopCount((0 until RenameWidth).map(i =>
Expand Down Expand Up @@ -362,27 +369,27 @@ class Dispatch(implicit p: Parameters) extends XSModule with HasPerfEvents {
io.toIntDq0.needAlloc(i) := io.fromRename(i).valid && isIntDq0(i) && !doesNotNeedExec && toIntDq0Valid(i)
io.toIntDq0.req(i).valid := io.fromRename(i).valid && isIntDq0(i) && !doesNotNeedExec && toIntDq0Valid(i) &&
canEnterDpq && dqCanAccept
io.toIntDq0.req(i).bits := updatedUop(i)
io.toIntDq0.req(i).bits := toDqUop(i)

io.toIntDq1.needAlloc(i) := io.fromRename(i).valid && isIntDq1(i) && !doesNotNeedExec && toIntDq1Valid(i)
io.toIntDq1.req(i).valid := io.fromRename(i).valid && isIntDq1(i) && !doesNotNeedExec && toIntDq1Valid(i) &&
canEnterDpq && dqCanAccept
io.toIntDq1.req(i).bits := updatedUop(i)
io.toIntDq1.req(i).bits := toDqUop(i)

io.toFpDq.needAlloc(i) := io.fromRename(i).valid && isFp(i)
io.toFpDq.req(i).valid := io.fromRename(i).valid && isFp(i) &&
canEnterDpq && dqCanAccept
io.toFpDq.req(i).bits := updatedUop(i)
io.toFpDq.req(i).bits := toDqUop(i)

io.toVecDq.needAlloc(i) := io.fromRename(i).valid && isVec(i)
io.toVecDq.req(i).valid := io.fromRename(i).valid && isVec(i) &&
canEnterDpq && dqCanAccept
io.toVecDq.req(i).bits := updatedUop(i)
io.toVecDq.req(i).bits := toDqUop(i)

io.toLsDq.needAlloc(i) := io.fromRename(i).valid && isMem(i)
io.toLsDq.req(i).valid := io.fromRename(i).valid && isMem(i) &&
canEnterDpq && dqCanAccept
io.toLsDq.req(i).bits := updatedUop(i)
io.toLsDq.req(i).bits := toDqUop(i)

//delete trigger message from frontend
io.toDq.map(dq => { dq.req(i).bits.trigger := TriggerAction.None })
Expand Down
3 changes: 3 additions & 0 deletions src/main/scala/xiangshan/backend/fu/FuType.scala
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ object FuType extends OHEnumeration {
val fpOP = fpArithAll ++ Seq(i2f, i2v)
val scalaNeedFrm = Seq(i2f, fmac, fDivSqrt)
val vectorNeedFrm = Seq(vfalu, vfma, vfdiv, vfcvt)
val blockBackCompress = Seq(brh, jmp, stu)

def X = BitPat.N(num) // Todo: Don't Care

Expand Down Expand Up @@ -211,6 +212,8 @@ object FuType extends OHEnumeration {

def isVectorNeedFrm(fuType: UInt): Bool = FuTypeOrR(fuType, vectorNeedFrm)

def isBlockBackCompress(fuType: UInt): Bool = FuTypeOrR(fuType, blockBackCompress)

object FuTypeOrR {
def apply(fuType: UInt, fu0: OHType, fus: OHType*): Bool = {
apply(fuType, fu0 +: fus)
Expand Down
33 changes: 25 additions & 8 deletions src/main/scala/xiangshan/backend/rename/CompressUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,12 @@ import chisel3._
import chisel3.util._
import freechips.rocketchip.rocket.DecodeLogic
import xiangshan._
import xiangshan.backend.fu.FuType

class CompressUnit(implicit p: Parameters) extends XSModule{
val io = IO(new Bundle {
val in = Vec(RenameWidth, Flipped(Valid(new DecodedInst)))
val oddFtqVec = Vec(RenameWidth, Input(Bool()))
val out = new Bundle {
val needRobFlags = Vec(RenameWidth, Output(Bool()))
val instrSizes = Vec(RenameWidth, Output(UInt(log2Ceil(RenameWidth + 1).W)))
Expand All @@ -49,27 +51,42 @@ class CompressUnit(implicit p: Parameters) extends XSModule{
val noExc = io.in.map(in => !in.bits.exceptionVec.asUInt.orR && !TriggerAction.isDmode(in.bits.trigger))
val uopCanCompress = io.in.map(_.bits.canRobCompress)
val canCompress = io.in.zip(noExc).zip(uopCanCompress).map { case ((in, noExc), canComp) =>
in.valid && !CommitType.isFused(in.bits.commitType) && in.bits.lastUop && noExc && canComp
in.valid && in.bits.lastUop && noExc && canComp
}
val extendedCanCompress = canCompress.zip(io.in).zip(io.oddFtqVec).flatMap { case ((canComp, in), oddFtq) =>
Seq(FuType.isBlockBackCompress(in.bits.fuType) || canComp ,canComp && !oddFtq)
}

val compressTable = (0 until 1 << RenameWidth).map { case keyCandidate =>
val compressTable = (0 until 1 << (2 * RenameWidth)).filter { baseCandidate =>
// check exist 01 pair
!(0 until RenameWidth).exists { i =>
val bitPair = (baseCandidate >> (2 * i)) & 0x3
bitPair == 0x2
}
}.zipWithIndex.map{ case (keyCandidate, index) =>
// padding 0s at each side for convenience
val key = 0 +: (0 until RenameWidth).map(idx => (keyCandidate >> idx) & 1) :+ 0
val key = 0 +: (0 until RenameWidth * 2).map(idx => (keyCandidate >> idx) & 1) :+ 0
// count 1s on the left side of key (including itself)
def cntL(idx: Int): Int = (if (key(idx - 1) == 1) cntL(idx - 1) else 0) + key(idx)
// count 1s on the right side of key (including itself)
def cntR(idx: Int): Int = (if (key(idx + 1) == 1) cntR(idx + 1) else 0) + key(idx)
// the last instruction among consecutive rob-compressed instructions is marked
val needRobs = (0 until RenameWidth).map(idx => ~(key.tail(idx) & key.tail(idx + 1)) & 1)
val needRobsExpand = (0 until RenameWidth * 2).map( idx => ~(key.tail(idx) & key.tail(idx + 1)) & 1)
val needRobs = needRobsExpand.grouped(2).map(group => group.reduce(_ | _)).toIndexedSeq
// how many instructions are rob-compressed with this instruction (including itself)
val uopSizes = (1 to RenameWidth).map(idx => if (key(idx) == 0) 1 else cntL(idx) + cntR(idx) - 1)
val uopSizes = (1 to RenameWidth).map{ idx =>
val i = idx * 2 - 1
if (key(i) == 0) 1 else (cntL(i) + cntR(i)) / 2
}
// which instructions are rob-compressed with this instruction
val masks = uopSizes.zip(1 to RenameWidth).map { case (size, idx) => // compress masks
if (key(idx) == 0) Seq.fill(RenameWidth)(0).updated(idx - 1, 1)
else Seq.fill(RenameWidth)(0).patch(idx - cntL(idx), Seq.fill(size)(1), size)
val i = idx * 2 - 1
if (key(i) == 0) Seq.fill(RenameWidth)(0).updated(idx - 1, 1)
else Seq.fill(RenameWidth)(0).patch(idx - (cntL(i) + 1)/2, Seq.fill(size)(1), size)
}

println("[Rename.Compress]" +
" index: " + index +
" i: " + keyCandidate +
" key: " + key.tail.dropRight(1) +
" needRobs: " + needRobs +
Expand All @@ -86,7 +103,7 @@ class CompressUnit(implicit p: Parameters) extends XSModule{
}

val default = Seq.fill(3 * RenameWidth)(BitPat.N())
val decoder = DecodeLogic(VecInit(canCompress).asUInt, default, compressTable)
val decoder = DecodeLogic(VecInit(extendedCanCompress).asUInt, default, compressTable)
(io.out.needRobFlags ++ io.out.instrSizes ++ io.out.masks).zip(decoder).foreach {
case (sink, source) => sink := source
}
Expand Down
Loading
Loading