From 06cb2bc1c357185706f76984bcc822e898a9ed60 Mon Sep 17 00:00:00 2001 From: weidingliu <47169884+weidingliu@users.noreply.github.com> Date: Fri, 17 Nov 2023 14:30:03 +0800 Subject: [PATCH] =?UTF-8?q?rv64v=EF=BC=9Afix=20bug=20of=20load=20whole=20r?= =?UTF-8?q?egister=20(#2485)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * decode:fix decode of vs*r/vl*re* Co-authored-by: Ziyue Zhang * UopQueue: fix bug in nfields and emul in store/load whole register --------- Co-authored-by: Ziyue Zhang --- .../scala/xiangshan/backend/decode/DecodeUnit.scala | 6 ++---- .../scala/xiangshan/backend/decode/UopInfoGen.scala | 6 ++++-- src/main/scala/xiangshan/mem/vector/VLUopQueue.scala | 11 ++++++----- src/main/scala/xiangshan/mem/vector/VSUopQueue.scala | 11 ++++++----- src/main/scala/xiangshan/mem/vector/VecCommon.scala | 12 ++++++++++++ 5 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/main/scala/xiangshan/backend/decode/DecodeUnit.scala b/src/main/scala/xiangshan/backend/decode/DecodeUnit.scala index 83be274de39..5b45b3922cc 100644 --- a/src/main/scala/xiangshan/backend/decode/DecodeUnit.scala +++ b/src/main/scala/xiangshan/backend/decode/DecodeUnit.scala @@ -828,13 +828,11 @@ class DecodeUnit(implicit p: Parameters) extends XSModule with DecodeUnitConstan val uopInfoGen = Module(new UopInfoGen) uopInfoGen.io.in.preInfo.typeOfSplit := decodedInst.uopSplitType uopInfoGen.io.in.preInfo.vsew := decodedInst.vpu.vsew - //------------------------------------------------------ - //when unit-stride instruction is load/store whole register, numOfUop should not use vtype.vlmul and should be 0 - uopInfoGen.io.in.preInfo.vlmul := Mux(decodedInst.fuOpType === VlduType.vlr || decodedInst.fuOpType === VstuType.vsr, - 0.U,decodedInst.vpu.vlmul) + uopInfoGen.io.in.preInfo.vlmul := decodedInst.vpu.vlmul uopInfoGen.io.in.preInfo.vwidth := inst.RM uopInfoGen.io.in.preInfo.vmvn := inst.IMM5_OPIVI(2, 0) uopInfoGen.io.in.preInfo.nf := inst.NF + uopInfoGen.io.in.preInfo.isVlsr := decodedInst.fuOpType === VlduType.vlr || decodedInst.fuOpType === VstuType.vsr io.deq.isComplex := uopInfoGen.io.out.isComplex io.deq.uopInfo.numOfUop := uopInfoGen.io.out.uopInfo.numOfUop io.deq.uopInfo.numOfWB := uopInfoGen.io.out.uopInfo.numOfWB diff --git a/src/main/scala/xiangshan/backend/decode/UopInfoGen.scala b/src/main/scala/xiangshan/backend/decode/UopInfoGen.scala index 888d9237d7d..6beca377cc1 100644 --- a/src/main/scala/xiangshan/backend/decode/UopInfoGen.scala +++ b/src/main/scala/xiangshan/backend/decode/UopInfoGen.scala @@ -85,6 +85,7 @@ class UopInfoGen (implicit p: Parameters) extends XSModule { val vsew = Cat(0.U(1.W), io.in.preInfo.vsew) val veew = Cat(0.U(1.W), io.in.preInfo.vwidth(1, 0)) val vmvn = io.in.preInfo.vmvn + val isVlsr = io.in.preInfo.isVlsr val vlmul = io.in.preInfo.vlmul val nf = io.in.preInfo.nf val isComplex = io.out.isComplex @@ -204,7 +205,7 @@ class UopInfoGen (implicit p: Parameters) extends XSModule { UopSplitType.VEC_RGATHER_VX -> (numOfUopVrgather +& 1.U), UopSplitType.VEC_RGATHEREI16 -> numOfUopVrgatherei16, UopSplitType.VEC_MVNR -> (vmvn +& 1.U), - UopSplitType.VEC_US_LDST -> (numOfUopVLoadStoreStrided +& 1.U), // with one move instruction + UopSplitType.VEC_US_LDST -> Mux(isVlsr, nf +& 2.U, (numOfUopVLoadStoreStrided +& 1.U)), // with one move instruction UopSplitType.VEC_S_LDST -> (numOfUopVLoadStoreStrided +& 2.U), // with two move instructions UopSplitType.VEC_I_LDST -> (numOfUopVLoadStoreIndexed +& 1.U), )) @@ -245,7 +246,7 @@ class UopInfoGen (implicit p: Parameters) extends XSModule { UopSplitType.VEC_RGATHER -> numOfUopVrgather, UopSplitType.VEC_RGATHER_VX -> (numOfUopVrgather +& 1.U), UopSplitType.VEC_RGATHEREI16 -> numOfUopVrgatherei16, - UopSplitType.VEC_US_LDST -> (numOfUopVLoadStoreStrided +& 1.U), // with one move instruction + UopSplitType.VEC_US_LDST -> Mux(isVlsr, nf +& 2.U, (numOfUopVLoadStoreStrided +& 1.U)), // with one move instruction UopSplitType.VEC_S_LDST -> (numOfUopVLoadStoreStrided +& 2.U), // with two move instructions UopSplitType.VEC_I_LDST -> (numOfWBVLoadStoreIndexed +& 1.U), UopSplitType.VEC_MVNR -> (vmvn +& 1.U), @@ -275,6 +276,7 @@ class PreInfo(implicit p: Parameters) extends XSBundle { val vwidth = UInt(3.W) //eew val nf = UInt(3.W) val vmvn = UInt(3.W) // vmvnr + val isVlsr = Bool() // is vector whole register load/store } class UopInfo(implicit p: Parameters) extends XSBundle { diff --git a/src/main/scala/xiangshan/mem/vector/VLUopQueue.scala b/src/main/scala/xiangshan/mem/vector/VLUopQueue.scala index 352524c20f0..25e85fcbacb 100644 --- a/src/main/scala/xiangshan/mem/vector/VLUopQueue.scala +++ b/src/main/scala/xiangshan/mem/vector/VLUopQueue.scala @@ -154,9 +154,9 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule // when load whole register or unit-stride masked , emul should be 1 val fuOpType = io.loadRegIn.bits.uop.fuOpType val mop = fuOpType(6, 5) - val nf = io.loadRegIn.bits.uop.vpu.nf + val nf = Mux(us_whole_reg(fuOpType), 0.U, io.loadRegIn.bits.uop.vpu.nf) val vm = io.loadRegIn.bits.uop.vpu.vm - val emul = Mux(us_whole_reg(fuOpType) || us_mask(fuOpType), 0.U(mulBits.W), EewLog2(eew) - sew + lmul) + val emul = Mux(us_whole_reg(fuOpType) ,GenUSWholeEmul(io.loadRegIn.bits.uop.vpu.nf), Mux(us_mask(fuOpType), 0.U(mulBits.W), EewLog2(eew) - sew + lmul)) val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul) val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul) val numEewLog2 = emulLog2 - EewLog2(eew) @@ -203,6 +203,7 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule UIntToMask(flowsIncludeThisUop, VLEN + 1) & ~UIntToMask(flowsPrevThisUop, VLEN) ) >> flowsPrevThisVd)(VLENB - 1, 0) + val isUsWholeReg = isUnitStride(mop) && us_whole_reg(fuOpType) dontTouch(flowsPrevThisUop) dontTouch(flowsPrevThisVd) dontTouch(flowsIncludeThisUop) @@ -214,7 +215,7 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule srcMaskVec(id) := srcMask uopq(id) match { case x => x.uop := io.loadRegIn.bits.uop - x.uop.vpu.vl := io.loadRegIn.bits.src_vl.asTypeOf(VConfig()).vl + x.uop.vpu.vl := Mux(isUsWholeReg, GenUSWholeRegVL(io.loadRegIn.bits.uop.vpu.nf +& 1.U,eew), io.loadRegIn.bits.src_vl.asTypeOf(VConfig()).vl) x.uop.numUops := numUops x.uop.lastUop := (uopIdx +& 1.U) === numUops x.flowMask := flowMask @@ -226,7 +227,7 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule x.flowNum := flows x.nfields := nf +& 1.U x.vm := vm - x.usWholeReg := isUnitStride(mop) && us_whole_reg(fuOpType) + x.usWholeReg := isUsWholeReg x.usMaskReg := isUnitStride(mop) && us_mask(fuOpType) x.eew := eew x.sew := sew @@ -338,7 +339,7 @@ class VlUopQueue(implicit p: Parameters) extends VLSUModule val mask = issueEntry.byteMask val regOffset = (elemIdxInsideField << issueAlignedType)(vOffsetBits - 1, 0) val enable = (issueFlowMask & UIntToOH(elemIdxInsideVd(portIdx))).orR - val ttttvl = Mux(issueEntry.usWholeReg, GenUSWholeRegVL(issueNFIELDS, issueEew), Mux(issueEntry.usMaskReg, GenUSMaskRegVL(issueVl), issueVl)) + val ttttvl = Mux(issueEntry.usMaskReg, GenUSMaskRegVL(issueVl), issueVl) val exp = VLExpCtrl( vstart = issueVstart, vl = ttttvl, diff --git a/src/main/scala/xiangshan/mem/vector/VSUopQueue.scala b/src/main/scala/xiangshan/mem/vector/VSUopQueue.scala index baf5f0b634d..711571524f9 100644 --- a/src/main/scala/xiangshan/mem/vector/VSUopQueue.scala +++ b/src/main/scala/xiangshan/mem/vector/VSUopQueue.scala @@ -107,9 +107,9 @@ class VsUopQueue(implicit p: Parameters) extends VLSUModule { // when store whole register or unit-stride masked , emul should be 1 val fuOpType = io.storeIn.bits.uop.fuOpType val mop = fuOpType(6, 5) - val nf = io.storeIn.bits.uop.vpu.nf + val nf = Mux(us_whole_reg(fuOpType), 0.U, io.storeIn.bits.uop.vpu.nf) val vm = io.storeIn.bits.uop.vpu.vm - val emul = Mux(us_whole_reg(fuOpType) || us_mask(fuOpType), 0.U(mulBits.W), EewLog2(eew) - sew + lmul) + val emul = Mux(us_whole_reg(fuOpType), GenUSWholeEmul(io.storeIn.bits.uop.vpu.nf), Mux(us_mask(fuOpType), 0.U(mulBits.W), EewLog2(eew) - sew + lmul)) val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul) val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul) val numEewLog2 = emulLog2 - EewLog2(eew) @@ -173,13 +173,14 @@ class VsUopQueue(implicit p: Parameters) extends VLSUModule { ~UIntToMask(flowsPrevThisUop, VLEN) ) >> flowsPrevThisVd)(VLENB - 1, 0) val vlmax = GenVLMAX(lmul, sew) + val isUsWholeReg = isUnitStride(mop) && us_whole_reg(fuOpType) valid(id) := true.B finish(id) := false.B exception(id) := false.B vstart(id) := 0.U uopq(id) match { case x => x.uop := io.storeIn.bits.uop - x.uop.vpu.vl := io.storeIn.bits.src_vl.asTypeOf(VConfig()).vl + x.uop.vpu.vl := Mux(isUsWholeReg, GenUSWholeRegVL(io.storeIn.bits.uop.vpu.nf +& 1.U,eew), io.storeIn.bits.src_vl.asTypeOf(VConfig()).vl) x.uop.numUops := numUops x.uop.lastUop := (uopIdx +& 1.U) === numUops x.flowMask := flowMask @@ -191,7 +192,7 @@ class VsUopQueue(implicit p: Parameters) extends VLSUModule { x.flowNum := flows x.nfields := nf +& 1.U x.vm := vm - x.usWholeReg := isUnitStride(mop) && us_whole_reg(fuOpType) + x.usWholeReg := isUsWholeReg x.usMaskReg := isUnitStride(mop) && us_mask(fuOpType) x.eew := eew x.sew := sew @@ -298,7 +299,7 @@ class VsUopQueue(implicit p: Parameters) extends VLSUModule { val enable = (issueFlowMask & UIntToOH(elemIdxInsideVd(portIdx))).orR val exp = VLExpCtrl( vstart = issueVstart, - vl = Mux(issueEntry.usWholeReg, GenUSWholeRegVL(issueNFIELDS, issueEew), Mux(issueEntry.usMaskReg, GenUSMaskRegVL(issueVl), issueVl)), + vl = Mux(issueEntry.usMaskReg, GenUSMaskRegVL(issueVl), issueVl), eleIdx = elemIdxInsideField ) && enable diff --git a/src/main/scala/xiangshan/mem/vector/VecCommon.scala b/src/main/scala/xiangshan/mem/vector/VecCommon.scala index d7f1a8e8ba0..2875444c9b6 100644 --- a/src/main/scala/xiangshan/mem/vector/VecCommon.scala +++ b/src/main/scala/xiangshan/mem/vector/VecCommon.scala @@ -628,6 +628,18 @@ object GenUSWholeRegVL extends VLSUConstants { )) } } +object GenUSWholeEmul extends VLSUConstants{ + def apply(nf: UInt): UInt={ + LookupTree(nf,List( + "b000".U -> "b000".U(mulBits.W), + "b001".U -> "b001".U(mulBits.W), + "b011".U -> "b010".U(mulBits.W), + "b111".U -> "b011".U(mulBits.W) + )) + } +} + + object GenUSMaskRegVL extends VLSUConstants { def apply(vl: UInt): UInt = { (vl >> 3.U)