Skip to content

Commit

Permalink
rv64v:fix bug of load whole register (#2485)
Browse files Browse the repository at this point in the history
* decode:fix decode of vs*r/vl*re*

Co-authored-by:  Ziyue Zhang <[email protected]>

* UopQueue: fix bug in nfields and emul in store/load whole register

---------

Co-authored-by: Ziyue Zhang <[email protected]>
  • Loading branch information
2 people authored and huxuan0307 committed Nov 20, 2023
1 parent 492aae8 commit 06cb2bc
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 16 deletions.
6 changes: 2 additions & 4 deletions src/main/scala/xiangshan/backend/decode/DecodeUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -828,13 +828,11 @@ class DecodeUnit(implicit p: Parameters) extends XSModule with DecodeUnitConstan
val uopInfoGen = Module(new UopInfoGen)
uopInfoGen.io.in.preInfo.typeOfSplit := decodedInst.uopSplitType
uopInfoGen.io.in.preInfo.vsew := decodedInst.vpu.vsew
//------------------------------------------------------
//when unit-stride instruction is load/store whole register, numOfUop should not use vtype.vlmul and should be 0
uopInfoGen.io.in.preInfo.vlmul := Mux(decodedInst.fuOpType === VlduType.vlr || decodedInst.fuOpType === VstuType.vsr,
0.U,decodedInst.vpu.vlmul)
uopInfoGen.io.in.preInfo.vlmul := decodedInst.vpu.vlmul
uopInfoGen.io.in.preInfo.vwidth := inst.RM
uopInfoGen.io.in.preInfo.vmvn := inst.IMM5_OPIVI(2, 0)
uopInfoGen.io.in.preInfo.nf := inst.NF
uopInfoGen.io.in.preInfo.isVlsr := decodedInst.fuOpType === VlduType.vlr || decodedInst.fuOpType === VstuType.vsr
io.deq.isComplex := uopInfoGen.io.out.isComplex
io.deq.uopInfo.numOfUop := uopInfoGen.io.out.uopInfo.numOfUop
io.deq.uopInfo.numOfWB := uopInfoGen.io.out.uopInfo.numOfWB
Expand Down
6 changes: 4 additions & 2 deletions src/main/scala/xiangshan/backend/decode/UopInfoGen.scala
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ class UopInfoGen (implicit p: Parameters) extends XSModule {
val vsew = Cat(0.U(1.W), io.in.preInfo.vsew)
val veew = Cat(0.U(1.W), io.in.preInfo.vwidth(1, 0))
val vmvn = io.in.preInfo.vmvn
val isVlsr = io.in.preInfo.isVlsr
val vlmul = io.in.preInfo.vlmul
val nf = io.in.preInfo.nf
val isComplex = io.out.isComplex
Expand Down Expand Up @@ -204,7 +205,7 @@ class UopInfoGen (implicit p: Parameters) extends XSModule {
UopSplitType.VEC_RGATHER_VX -> (numOfUopVrgather +& 1.U),
UopSplitType.VEC_RGATHEREI16 -> numOfUopVrgatherei16,
UopSplitType.VEC_MVNR -> (vmvn +& 1.U),
UopSplitType.VEC_US_LDST -> (numOfUopVLoadStoreStrided +& 1.U), // with one move instruction
UopSplitType.VEC_US_LDST -> Mux(isVlsr, nf +& 2.U, (numOfUopVLoadStoreStrided +& 1.U)), // with one move instruction
UopSplitType.VEC_S_LDST -> (numOfUopVLoadStoreStrided +& 2.U), // with two move instructions
UopSplitType.VEC_I_LDST -> (numOfUopVLoadStoreIndexed +& 1.U),
))
Expand Down Expand Up @@ -245,7 +246,7 @@ class UopInfoGen (implicit p: Parameters) extends XSModule {
UopSplitType.VEC_RGATHER -> numOfUopVrgather,
UopSplitType.VEC_RGATHER_VX -> (numOfUopVrgather +& 1.U),
UopSplitType.VEC_RGATHEREI16 -> numOfUopVrgatherei16,
UopSplitType.VEC_US_LDST -> (numOfUopVLoadStoreStrided +& 1.U), // with one move instruction
UopSplitType.VEC_US_LDST -> Mux(isVlsr, nf +& 2.U, (numOfUopVLoadStoreStrided +& 1.U)), // with one move instruction
UopSplitType.VEC_S_LDST -> (numOfUopVLoadStoreStrided +& 2.U), // with two move instructions
UopSplitType.VEC_I_LDST -> (numOfWBVLoadStoreIndexed +& 1.U),
UopSplitType.VEC_MVNR -> (vmvn +& 1.U),
Expand Down Expand Up @@ -275,6 +276,7 @@ class PreInfo(implicit p: Parameters) extends XSBundle {
val vwidth = UInt(3.W) //eew
val nf = UInt(3.W)
val vmvn = UInt(3.W) // vmvnr
val isVlsr = Bool() // is vector whole register load/store
}

class UopInfo(implicit p: Parameters) extends XSBundle {
Expand Down
11 changes: 6 additions & 5 deletions src/main/scala/xiangshan/mem/vector/VLUopQueue.scala
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule
// when load whole register or unit-stride masked , emul should be 1
val fuOpType = io.loadRegIn.bits.uop.fuOpType
val mop = fuOpType(6, 5)
val nf = io.loadRegIn.bits.uop.vpu.nf
val nf = Mux(us_whole_reg(fuOpType), 0.U, io.loadRegIn.bits.uop.vpu.nf)
val vm = io.loadRegIn.bits.uop.vpu.vm
val emul = Mux(us_whole_reg(fuOpType) || us_mask(fuOpType), 0.U(mulBits.W), EewLog2(eew) - sew + lmul)
val emul = Mux(us_whole_reg(fuOpType) ,GenUSWholeEmul(io.loadRegIn.bits.uop.vpu.nf), Mux(us_mask(fuOpType), 0.U(mulBits.W), EewLog2(eew) - sew + lmul))
val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul)
val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul)
val numEewLog2 = emulLog2 - EewLog2(eew)
Expand Down Expand Up @@ -203,6 +203,7 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule
UIntToMask(flowsIncludeThisUop, VLEN + 1) &
~UIntToMask(flowsPrevThisUop, VLEN)
) >> flowsPrevThisVd)(VLENB - 1, 0)
val isUsWholeReg = isUnitStride(mop) && us_whole_reg(fuOpType)
dontTouch(flowsPrevThisUop)
dontTouch(flowsPrevThisVd)
dontTouch(flowsIncludeThisUop)
Expand All @@ -214,7 +215,7 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule
srcMaskVec(id) := srcMask
uopq(id) match { case x =>
x.uop := io.loadRegIn.bits.uop
x.uop.vpu.vl := io.loadRegIn.bits.src_vl.asTypeOf(VConfig()).vl
x.uop.vpu.vl := Mux(isUsWholeReg, GenUSWholeRegVL(io.loadRegIn.bits.uop.vpu.nf +& 1.U,eew), io.loadRegIn.bits.src_vl.asTypeOf(VConfig()).vl)
x.uop.numUops := numUops
x.uop.lastUop := (uopIdx +& 1.U) === numUops
x.flowMask := flowMask
Expand All @@ -226,7 +227,7 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule
x.flowNum := flows
x.nfields := nf +& 1.U
x.vm := vm
x.usWholeReg := isUnitStride(mop) && us_whole_reg(fuOpType)
x.usWholeReg := isUsWholeReg
x.usMaskReg := isUnitStride(mop) && us_mask(fuOpType)
x.eew := eew
x.sew := sew
Expand Down Expand Up @@ -338,7 +339,7 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule
val mask = issueEntry.byteMask
val regOffset = (elemIdxInsideField << issueAlignedType)(vOffsetBits - 1, 0)
val enable = (issueFlowMask & UIntToOH(elemIdxInsideVd(portIdx))).orR
val ttttvl = Mux(issueEntry.usWholeReg, GenUSWholeRegVL(issueNFIELDS, issueEew), Mux(issueEntry.usMaskReg, GenUSMaskRegVL(issueVl), issueVl))
val ttttvl = Mux(issueEntry.usMaskReg, GenUSMaskRegVL(issueVl), issueVl)
val exp = VLExpCtrl(
vstart = issueVstart,
vl = ttttvl,
Expand Down
11 changes: 6 additions & 5 deletions src/main/scala/xiangshan/mem/vector/VSUopQueue.scala
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ class VsUopQueue(implicit p: Parameters) extends VLSUModule {
// when store whole register or unit-stride masked , emul should be 1
val fuOpType = io.storeIn.bits.uop.fuOpType
val mop = fuOpType(6, 5)
val nf = io.storeIn.bits.uop.vpu.nf
val nf = Mux(us_whole_reg(fuOpType), 0.U, io.storeIn.bits.uop.vpu.nf)
val vm = io.storeIn.bits.uop.vpu.vm
val emul = Mux(us_whole_reg(fuOpType) || us_mask(fuOpType), 0.U(mulBits.W), EewLog2(eew) - sew + lmul)
val emul = Mux(us_whole_reg(fuOpType), GenUSWholeEmul(io.storeIn.bits.uop.vpu.nf), Mux(us_mask(fuOpType), 0.U(mulBits.W), EewLog2(eew) - sew + lmul))
val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul)
val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul)
val numEewLog2 = emulLog2 - EewLog2(eew)
Expand Down Expand Up @@ -173,13 +173,14 @@ class VsUopQueue(implicit p: Parameters) extends VLSUModule {
~UIntToMask(flowsPrevThisUop, VLEN)
) >> flowsPrevThisVd)(VLENB - 1, 0)
val vlmax = GenVLMAX(lmul, sew)
val isUsWholeReg = isUnitStride(mop) && us_whole_reg(fuOpType)
valid(id) := true.B
finish(id) := false.B
exception(id) := false.B
vstart(id) := 0.U
uopq(id) match { case x =>
x.uop := io.storeIn.bits.uop
x.uop.vpu.vl := io.storeIn.bits.src_vl.asTypeOf(VConfig()).vl
x.uop.vpu.vl := Mux(isUsWholeReg, GenUSWholeRegVL(io.storeIn.bits.uop.vpu.nf +& 1.U,eew), io.storeIn.bits.src_vl.asTypeOf(VConfig()).vl)
x.uop.numUops := numUops
x.uop.lastUop := (uopIdx +& 1.U) === numUops
x.flowMask := flowMask
Expand All @@ -191,7 +192,7 @@ class VsUopQueue(implicit p: Parameters) extends VLSUModule {
x.flowNum := flows
x.nfields := nf +& 1.U
x.vm := vm
x.usWholeReg := isUnitStride(mop) && us_whole_reg(fuOpType)
x.usWholeReg := isUsWholeReg
x.usMaskReg := isUnitStride(mop) && us_mask(fuOpType)
x.eew := eew
x.sew := sew
Expand Down Expand Up @@ -298,7 +299,7 @@ class VsUopQueue(implicit p: Parameters) extends VLSUModule {
val enable = (issueFlowMask & UIntToOH(elemIdxInsideVd(portIdx))).orR
val exp = VLExpCtrl(
vstart = issueVstart,
vl = Mux(issueEntry.usWholeReg, GenUSWholeRegVL(issueNFIELDS, issueEew), Mux(issueEntry.usMaskReg, GenUSMaskRegVL(issueVl), issueVl)),
vl = Mux(issueEntry.usMaskReg, GenUSMaskRegVL(issueVl), issueVl),
eleIdx = elemIdxInsideField
) && enable

Expand Down
12 changes: 12 additions & 0 deletions src/main/scala/xiangshan/mem/vector/VecCommon.scala
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,18 @@ object GenUSWholeRegVL extends VLSUConstants {
))
}
}
object GenUSWholeEmul extends VLSUConstants{
def apply(nf: UInt): UInt={
LookupTree(nf,List(
"b000".U -> "b000".U(mulBits.W),
"b001".U -> "b001".U(mulBits.W),
"b011".U -> "b010".U(mulBits.W),
"b111".U -> "b011".U(mulBits.W)
))
}
}


object GenUSMaskRegVL extends VLSUConstants {
def apply(vl: UInt): UInt = {
(vl >> 3.U)
Expand Down

0 comments on commit 06cb2bc

Please sign in to comment.