Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rv64v:fix bug of load whole register #2485

Merged
merged 2 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions src/main/scala/xiangshan/backend/decode/DecodeUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -829,13 +829,11 @@ class DecodeUnit(implicit p: Parameters) extends XSModule with DecodeUnitConstan
val uopInfoGen = Module(new UopInfoGen)
uopInfoGen.io.in.preInfo.typeOfSplit := decodedInst.uopSplitType
uopInfoGen.io.in.preInfo.vsew := decodedInst.vpu.vsew
//------------------------------------------------------
//when unit-stride instruction is load/store whole register, numOfUop should not use vtype.vlmul and should be 0
uopInfoGen.io.in.preInfo.vlmul := Mux(decodedInst.fuOpType === VlduType.vlr || decodedInst.fuOpType === VstuType.vsr,
0.U,decodedInst.vpu.vlmul)
uopInfoGen.io.in.preInfo.vlmul := decodedInst.vpu.vlmul
uopInfoGen.io.in.preInfo.vwidth := inst.RM
uopInfoGen.io.in.preInfo.nf := inst.NF
uopInfoGen.io.in.preInfo.vmvn := inst.IMM5_OPIVI(2, 0)
uopInfoGen.io.in.preInfo.isVlsr := decodedInst.fuOpType === VlduType.vlr || decodedInst.fuOpType === VstuType.vsr
io.deq.isComplex := uopInfoGen.io.out.isComplex
io.deq.uopInfo.numOfUop := uopInfoGen.io.out.uopInfo.numOfUop
io.deq.uopInfo.numOfWB := uopInfoGen.io.out.uopInfo.numOfWB
Expand Down
6 changes: 4 additions & 2 deletions src/main/scala/xiangshan/backend/decode/UopInfoGen.scala
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class UopInfoGen (implicit p: Parameters) extends XSModule {
val vsew = Cat(0.U(1.W), io.in.preInfo.vsew)
val veew = Cat(0.U(1.W), io.in.preInfo.vwidth(1, 0))
val vmvn = io.in.preInfo.vmvn
val isVlsr = io.in.preInfo.isVlsr
val vlmul = io.in.preInfo.vlmul
val nf = io.in.preInfo.nf
val isComplex = io.out.isComplex
Expand Down Expand Up @@ -206,7 +207,7 @@ class UopInfoGen (implicit p: Parameters) extends XSModule {
UopSplitType.VEC_RGATHER -> numOfUopVrgather,
UopSplitType.VEC_RGATHER_VX -> (numOfUopVrgather +& 1.U),
UopSplitType.VEC_RGATHEREI16 -> numOfUopVrgatherei16,
UopSplitType.VEC_US_LDST -> (numOfUopVLoadStoreStrided +& 1.U), // with one move instruction
UopSplitType.VEC_US_LDST -> Mux(isVlsr, nf +& 2.U, (numOfUopVLoadStoreStrided +& 1.U)), // with one move instruction
UopSplitType.VEC_S_LDST -> (numOfUopVLoadStoreStrided +& 2.U), // with two move instructions
UopSplitType.VEC_I_LDST -> (numOfUopVLoadStoreIndexed +& 1.U),
UopSplitType.VEC_MVNR -> (vmvn +& 1.U),
Expand Down Expand Up @@ -251,7 +252,7 @@ class UopInfoGen (implicit p: Parameters) extends XSModule {
UopSplitType.VEC_RGATHER -> numOfUopVrgather,
UopSplitType.VEC_RGATHER_VX -> (numOfUopVrgather +& 1.U),
UopSplitType.VEC_RGATHEREI16 -> numOfUopVrgatherei16,
UopSplitType.VEC_US_LDST -> (numOfUopVLoadStoreStrided +& 1.U), // with one move instruction
UopSplitType.VEC_US_LDST -> Mux(isVlsr, nf +& 2.U, (numOfUopVLoadStoreStrided +& 1.U)), // with one move instruction
UopSplitType.VEC_S_LDST -> (numOfUopVLoadStoreStrided +& 2.U), // with two move instructions
UopSplitType.VEC_I_LDST -> (numOfWBVLoadStoreIndexed +& 1.U),
UopSplitType.VEC_MVNR -> (vmvn +& 1.U),
Expand Down Expand Up @@ -281,6 +282,7 @@ class PreInfo(implicit p: Parameters) extends XSBundle {
val vwidth = UInt(3.W) //eew
val nf = UInt(3.W)
val vmvn = UInt(3.W) // vmvnr
val isVlsr = Bool() // is vector whole register load/store
}

class UopInfo(implicit p: Parameters) extends XSBundle {
Expand Down
11 changes: 6 additions & 5 deletions src/main/scala/xiangshan/mem/vector/VLUopQueue.scala
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule
// when load whole register or unit-stride masked , emul should be 1
val fuOpType = io.loadRegIn.bits.uop.fuOpType
val mop = fuOpType(6, 5)
val nf = io.loadRegIn.bits.uop.vpu.nf
val nf = Mux(us_whole_reg(fuOpType), 0.U, io.loadRegIn.bits.uop.vpu.nf)
val vm = io.loadRegIn.bits.uop.vpu.vm
val emul = Mux(us_whole_reg(fuOpType) || us_mask(fuOpType), 0.U(mulBits.W), EewLog2(eew) - sew + lmul)
val emul = Mux(us_whole_reg(fuOpType) ,GenUSWholeEmul(io.loadRegIn.bits.uop.vpu.nf), Mux(us_mask(fuOpType), 0.U(mulBits.W), EewLog2(eew) - sew + lmul))
val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul)
val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul)
val numEewLog2 = emulLog2 - EewLog2(eew)
Expand Down Expand Up @@ -203,6 +203,7 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule
UIntToMask(flowsIncludeThisUop, VLEN + 1) &
~UIntToMask(flowsPrevThisUop, VLEN)
) >> flowsPrevThisVd)(VLENB - 1, 0)
val isUsWholeReg = isUnitStride(mop) && us_whole_reg(fuOpType)
dontTouch(flowsPrevThisUop)
dontTouch(flowsPrevThisVd)
dontTouch(flowsIncludeThisUop)
Expand All @@ -214,7 +215,7 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule
srcMaskVec(id) := srcMask
uopq(id) match { case x =>
x.uop := io.loadRegIn.bits.uop
x.uop.vpu.vl := io.loadRegIn.bits.src_vl.asTypeOf(VConfig()).vl
x.uop.vpu.vl := Mux(isUsWholeReg, GenUSWholeRegVL(io.loadRegIn.bits.uop.vpu.nf +& 1.U,eew), io.loadRegIn.bits.src_vl.asTypeOf(VConfig()).vl)
x.uop.numUops := numUops
x.uop.lastUop := (uopIdx +& 1.U) === numUops
x.flowMask := flowMask
Expand All @@ -226,7 +227,7 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule
x.flowNum := flows
x.nfields := nf +& 1.U
x.vm := vm
x.usWholeReg := isUnitStride(mop) && us_whole_reg(fuOpType)
x.usWholeReg := isUsWholeReg
x.usMaskReg := isUnitStride(mop) && us_mask(fuOpType)
x.eew := eew
x.sew := sew
Expand Down Expand Up @@ -338,7 +339,7 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule
val mask = issueEntry.byteMask
val regOffset = (elemIdxInsideField << issueAlignedType)(vOffsetBits - 1, 0)
val enable = (issueFlowMask & UIntToOH(elemIdxInsideVd(portIdx))).orR
val ttttvl = Mux(issueEntry.usWholeReg, GenUSWholeRegVL(issueNFIELDS, issueEew), Mux(issueEntry.usMaskReg, GenUSMaskRegVL(issueVl), issueVl))
val ttttvl = Mux(issueEntry.usMaskReg, GenUSMaskRegVL(issueVl), issueVl)
val exp = VLExpCtrl(
vstart = issueVstart,
vl = ttttvl,
Expand Down
11 changes: 6 additions & 5 deletions src/main/scala/xiangshan/mem/vector/VSUopQueue.scala
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ class VsUopQueue(implicit p: Parameters) extends VLSUModule {
// when store whole register or unit-stride masked , emul should be 1
val fuOpType = io.storeIn.bits.uop.fuOpType
val mop = fuOpType(6, 5)
val nf = io.storeIn.bits.uop.vpu.nf
val nf = Mux(us_whole_reg(fuOpType), 0.U, io.storeIn.bits.uop.vpu.nf)
val vm = io.storeIn.bits.uop.vpu.vm
val emul = Mux(us_whole_reg(fuOpType) || us_mask(fuOpType), 0.U(mulBits.W), EewLog2(eew) - sew + lmul)
val emul = Mux(us_whole_reg(fuOpType), GenUSWholeEmul(io.storeIn.bits.uop.vpu.nf), Mux(us_mask(fuOpType), 0.U(mulBits.W), EewLog2(eew) - sew + lmul))
val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul)
val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul)
val numEewLog2 = emulLog2 - EewLog2(eew)
Expand Down Expand Up @@ -173,13 +173,14 @@ class VsUopQueue(implicit p: Parameters) extends VLSUModule {
~UIntToMask(flowsPrevThisUop, VLEN)
) >> flowsPrevThisVd)(VLENB - 1, 0)
val vlmax = GenVLMAX(lmul, sew)
val isUsWholeReg = isUnitStride(mop) && us_whole_reg(fuOpType)
valid(id) := true.B
finish(id) := false.B
exception(id) := false.B
vstart(id) := 0.U
uopq(id) match { case x =>
x.uop := io.storeIn.bits.uop
x.uop.vpu.vl := io.storeIn.bits.src_vl.asTypeOf(VConfig()).vl
x.uop.vpu.vl := Mux(isUsWholeReg, GenUSWholeRegVL(io.storeIn.bits.uop.vpu.nf +& 1.U,eew), io.storeIn.bits.src_vl.asTypeOf(VConfig()).vl)
x.uop.numUops := numUops
x.uop.lastUop := (uopIdx +& 1.U) === numUops
x.flowMask := flowMask
Expand All @@ -191,7 +192,7 @@ class VsUopQueue(implicit p: Parameters) extends VLSUModule {
x.flowNum := flows
x.nfields := nf +& 1.U
x.vm := vm
x.usWholeReg := isUnitStride(mop) && us_whole_reg(fuOpType)
x.usWholeReg := isUsWholeReg
x.usMaskReg := isUnitStride(mop) && us_mask(fuOpType)
x.eew := eew
x.sew := sew
Expand Down Expand Up @@ -298,7 +299,7 @@ class VsUopQueue(implicit p: Parameters) extends VLSUModule {
val enable = (issueFlowMask & UIntToOH(elemIdxInsideVd(portIdx))).orR
val exp = VLExpCtrl(
vstart = issueVstart,
vl = Mux(issueEntry.usWholeReg, GenUSWholeRegVL(issueNFIELDS, issueEew), Mux(issueEntry.usMaskReg, GenUSMaskRegVL(issueVl), issueVl)),
vl = Mux(issueEntry.usMaskReg, GenUSMaskRegVL(issueVl), issueVl),
eleIdx = elemIdxInsideField
) && enable

Expand Down
12 changes: 12 additions & 0 deletions src/main/scala/xiangshan/mem/vector/VecCommon.scala
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,18 @@ object GenUSWholeRegVL extends VLSUConstants {
))
}
}
object GenUSWholeEmul extends VLSUConstants{
def apply(nf: UInt): UInt={
LookupTree(nf,List(
"b000".U -> "b000".U(mulBits.W),
"b001".U -> "b001".U(mulBits.W),
"b011".U -> "b010".U(mulBits.W),
"b111".U -> "b011".U(mulBits.W)
))
}
}


object GenUSMaskRegVL extends VLSUConstants {
def apply(vl: UInt): UInt = {
(vl >> 3.U)
Expand Down