diff --git a/ext/softfloat/SConscript b/ext/softfloat/SConscript index 689cbcf925..c145ad5975 100644 --- a/ext/softfloat/SConscript +++ b/ext/softfloat/SConscript @@ -160,6 +160,8 @@ SoftfloatFile('f64_to_ui32.c') SoftfloatFile('f64_to_ui32_r_minMag.c') SoftfloatFile('f64_to_ui64.c') SoftfloatFile('f64_to_ui64_r_minMag.c') +SoftfloatFile('fall_maxmin.c') +SoftfloatFile('fall_reciprocal.c') SoftfloatFile('i32_to_f128.c') SoftfloatFile('i32_to_f16.c') SoftfloatFile('i32_to_f32.c') diff --git a/ext/softfloat/fall_maxmin.c b/ext/softfloat/fall_maxmin.c new file mode 100644 index 0000000000..7efb86d1a5 --- /dev/null +++ b/ext/softfloat/fall_maxmin.c @@ -0,0 +1,73 @@ + +/*============================================================================ +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +=============================================================================*/ + +#include +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +#define COMPARE_MAX(a, b, bits) \ +float ## bits ## _t f ## bits ## _max( float ## bits ## _t a, float ## bits ## _t b ) \ +{ \ + bool greater = f ## bits ## _lt_quiet(b, a) || \ + (f ## bits ## _eq(b, a) && signF ## bits ## UI(b.v)); \ + \ + if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) { \ + union ui ## bits ## _f ## bits ui; \ + ui.ui = defaultNaNF ## bits ## UI; \ + return ui.f; \ + } else { \ + return greater || isNaNF ## bits ## UI((b).v) ? a : b; \ + } \ +} + +#define COMPARE_MIN(a, b, bits) \ +float ## bits ## _t f ## bits ## _min( float ## bits ## _t a, float ## bits ## _t b ) \ +{ \ + bool less = f ## bits ## _lt_quiet(a, b) || \ + (f ## bits ## _eq(a, b) && signF ## bits ## UI(a.v)); \ + \ + if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) { \ + union ui ## bits ## _f ## bits ui; \ + ui.ui = defaultNaNF ## bits ## UI; \ + return ui.f; \ + } else { \ + return less || isNaNF ## bits ## UI((b).v) ? a : b; \ + } \ +} + +COMPARE_MAX(a, b, 16); +COMPARE_MAX(a, b, 32); +COMPARE_MAX(a, b, 64); + +COMPARE_MIN(a, b, 16); +COMPARE_MIN(a, b, 32); +COMPARE_MIN(a, b, 64); \ No newline at end of file diff --git a/ext/softfloat/fall_reciprocal.c b/ext/softfloat/fall_reciprocal.c new file mode 100644 index 0000000000..ead2fe657b --- /dev/null +++ b/ext/softfloat/fall_reciprocal.c @@ -0,0 +1,392 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include +#include +#include +#include "platform.h" +#include "internals.h" +#include "specialize.h" +#include "softfloat.h" + +static inline uint64_t extract64(uint64_t val, int pos, int len) +{ + assert(pos >= 0 && len > 0 && len <= 64 - pos); + return (val >> pos) & (~UINT64_C(0) >> (64 - len)); +} + +static inline uint64_t make_mask64(int pos, int len) +{ + assert(pos >= 0 && len > 0 && pos < 64 && len <= 64); + return (UINT64_MAX >> (64 - len)) << pos; +} + +//user needs to truncate output to required length +static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) { + uint64_t exp = extract64(val, s, e); + uint64_t sig = extract64(val, 0, s); + uint64_t sign = extract64(val, s + e, 1); + const int p = 7; + + static const uint8_t table[] = { + 52, 51, 50, 48, 47, 46, 44, 43, + 42, 41, 40, 39, 38, 36, 35, 34, + 33, 32, 31, 30, 30, 29, 28, 27, + 26, 25, 24, 23, 23, 22, 21, 20, + 19, 19, 18, 17, 16, 16, 15, 14, + 14, 13, 12, 12, 11, 10, 10, 9, + 9, 8, 7, 7, 6, 6, 5, 4, + 4, 3, 3, 2, 2, 1, 1, 0, + 127, 125, 123, 121, 119, 118, 116, 114, + 113, 111, 109, 108, 106, 105, 103, 102, + 100, 99, 97, 96, 95, 93, 92, 91, + 90, 88, 87, 86, 85, 84, 83, 82, + 80, 79, 78, 77, 76, 75, 74, 73, + 72, 71, 70, 70, 69, 68, 67, 66, + 65, 64, 63, 63, 62, 61, 60, 59, + 59, 58, 57, 56, 56, 55, 54, 53}; + + if (sub) { + while (extract64(sig, s - 1, 1) == 0) + exp--, sig <<= 1; + + sig = (sig << 1) & make_mask64(0 ,s); + } + + int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1)); + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); + uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2; + + return (sign << (s+e)) | (out_exp << s) | out_sig; +} + +float16_t f16_rsqrte7(float16_t in) +{ + union ui16_f16 uA; + + uA.f = in; + unsigned int ret = f16_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF16UI; + break; + case 0x008: // -0 + uA.ui = 0xfc00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7c00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + default: // +num + uA.ui = rsqrte7(uA.ui, 5, 10, sub); + break; + } + + return uA.f; +} + +float32_t f32_rsqrte7(float32_t in) +{ + union ui32_f32 uA; + + uA.f = in; + unsigned int ret = f32_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF32UI; + break; + case 0x008: // -0 + uA.ui = 0xff800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7f800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + default: // +num + uA.ui = rsqrte7(uA.ui, 8, 23, sub); + break; + } + + return uA.f; +} + +float64_t f64_rsqrte7(float64_t in) +{ + union ui64_f64 uA; + + uA.f = in; + unsigned int ret = f64_classify(in); + bool sub = false; + switch(ret) { + case 0x001: // -inf + case 0x002: // -normal + case 0x004: // -subnormal + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF64UI; + break; + case 0x008: // -0 + uA.ui = 0xfff0000000000000ul; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7ff0000000000000ul; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x020: //+ sub + sub = true; + default: // +num + uA.ui = rsqrte7(uA.ui, 11, 52, sub); + break; + } + + return uA.f; +} + +//user needs to truncate output to required length +static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub, + bool *round_abnormal) +{ + uint64_t exp = extract64(val, s, e); + uint64_t sig = extract64(val, 0, s); + uint64_t sign = extract64(val, s + e, 1); + const int p = 7; + + static const uint8_t table[] = { + 127, 125, 123, 121, 119, 117, 116, 114, + 112, 110, 109, 107, 105, 104, 102, 100, + 99, 97, 96, 94, 93, 91, 90, 88, + 87, 85, 84, 83, 81, 80, 79, 77, + 76, 75, 74, 72, 71, 70, 69, 68, + 66, 65, 64, 63, 62, 61, 60, 59, + 58, 57, 56, 55, 54, 53, 52, 51, + 50, 49, 48, 47, 46, 45, 44, 43, + 42, 41, 40, 40, 39, 38, 37, 36, + 35, 35, 34, 33, 32, 31, 31, 30, + 29, 28, 28, 27, 26, 25, 25, 24, + 23, 23, 22, 21, 21, 20, 19, 19, + 18, 17, 17, 16, 15, 15, 14, 14, + 13, 12, 12, 11, 11, 10, 9, 9, + 8, 8, 7, 7, 6, 5, 5, 4, + 4, 3, 3, 2, 2, 1, 1, 0}; + + if (sub) { + while (extract64(sig, s - 1, 1) == 0) + exp--, sig <<= 1; + + sig = (sig << 1) & make_mask64(0 ,s); + + if (exp != 0 && exp != UINT64_MAX) { + *round_abnormal = true; + if (rm == 1 || + (rm == 2 && !sign) || + (rm == 3 && sign)) + return ((sign << (s+e)) | make_mask64(s, e)) - 1; + else + return (sign << (s+e)) | make_mask64(s, e); + } + } + + int idx = sig >> (s-p); + uint64_t out_sig = (uint64_t)(table[idx]) << (s-p); + uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp; + if (out_exp == 0 || out_exp == UINT64_MAX) { + out_sig = (out_sig >> 1) | make_mask64(s - 1, 1); + if (out_exp == UINT64_MAX) { + out_sig >>= 1; + out_exp = 0; + } + } + + return (sign << (s+e)) | (out_exp << s) | out_sig; +} + +float16_t f16_recip7(float16_t in) +{ + union ui16_f16 uA; + + uA.f = in; + unsigned int ret = f16_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x8000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xfc00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7c00; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF16UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + default: // +- normal + uA.ui = recip7(uA.ui, 5, 10, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} + +float32_t f32_recip7(float32_t in) +{ + union ui32_f32 uA; + + uA.f = in; + unsigned int ret = f32_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x80000000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xff800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7f800000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF32UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + default: // +- normal + uA.ui = recip7(uA.ui, 8, 23, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} + +float64_t f64_recip7(float64_t in) +{ + union ui64_f64 uA; + + uA.f = in; + unsigned int ret = f64_classify(in); + bool sub = false; + bool round_abnormal = false; + switch(ret) { + case 0x001: // -inf + uA.ui = 0x8000000000000000; + break; + case 0x080: //+inf + uA.ui = 0x0; + break; + case 0x008: // -0 + uA.ui = 0xfff0000000000000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x010: // +0 + uA.ui = 0x7ff0000000000000; + softfloat_exceptionFlags |= softfloat_flag_infinite; + break; + case 0x100: // sNaN + softfloat_exceptionFlags |= softfloat_flag_invalid; + case 0x200: //qNaN + uA.ui = defaultNaNF64UI; + break; + case 0x004: // -subnormal + case 0x020: //+ sub + sub = true; + default: // +- normal + uA.ui = recip7(uA.ui, 11, 52, + softfloat_roundingMode, sub, &round_abnormal); + if (round_abnormal) + softfloat_exceptionFlags |= softfloat_flag_inexact | + softfloat_flag_overflow; + break; + } + + return uA.f; +} \ No newline at end of file diff --git a/ext/softfloat/platform.h b/ext/softfloat/platform.h index 03dd429faf..91aa146bb1 100644 --- a/ext/softfloat/platform.h +++ b/ext/softfloat/platform.h @@ -41,6 +41,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define INLINE_LEVEL 5 #define SOFTFLOAT_FAST_INT64 #define SOFTFLOAT_FAST_DIV64TO32 +#define SOFTFLOAT_ROUND_ODD /*---------------------------------------------------------------------------- *----------------------------------------------------------------------------*/ diff --git a/ext/softfloat/softfloat.h b/ext/softfloat/softfloat.h index 41cacbc53b..50374876d0 100644 --- a/ext/softfloat/softfloat.h +++ b/ext/softfloat/softfloat.h @@ -161,6 +161,8 @@ void f16_to_f128M( float16_t, float128_t * ); float16_t f16_roundToInt( float16_t, uint_fast8_t, bool ); float16_t f16_add( float16_t, float16_t ); float16_t f16_sub( float16_t, float16_t ); +float16_t f16_max( float16_t, float16_t ); +float16_t f16_min( float16_t, float16_t ); float16_t f16_mul( float16_t, float16_t ); float16_t f16_mulAdd( float16_t, float16_t, float16_t ); float16_t f16_div( float16_t, float16_t ); @@ -174,6 +176,8 @@ bool f16_le_quiet( float16_t, float16_t ); bool f16_lt_quiet( float16_t, float16_t ); bool f16_isSignalingNaN( float16_t ); uint_fast16_t f16_classify( float16_t ); +float16_t f16_rsqrte7( float16_t ); +float16_t f16_recip7( float16_t ); /*---------------------------------------------------------------------------- | 32-bit (single-precision) floating-point operations. @@ -197,6 +201,8 @@ void f32_to_f128M( float32_t, float128_t * ); float32_t f32_roundToInt( float32_t, uint_fast8_t, bool ); float32_t f32_add( float32_t, float32_t ); float32_t f32_sub( float32_t, float32_t ); +float32_t f32_max( float32_t, float32_t ); +float32_t f32_min( float32_t, float32_t ); float32_t f32_mul( float32_t, float32_t ); float32_t f32_mulAdd( float32_t, float32_t, float32_t ); float32_t f32_div( float32_t, float32_t ); @@ -210,6 +216,8 @@ bool f32_le_quiet( float32_t, float32_t ); bool f32_lt_quiet( float32_t, float32_t ); bool f32_isSignalingNaN( float32_t ); uint_fast16_t f32_classify( float32_t ); +float32_t f32_rsqrte7( float32_t ); +float32_t f32_recip7( float32_t ); /*---------------------------------------------------------------------------- | 64-bit (double-precision) floating-point operations. @@ -233,6 +241,8 @@ void f64_to_f128M( float64_t, float128_t * ); float64_t f64_roundToInt( float64_t, uint_fast8_t, bool ); float64_t f64_add( float64_t, float64_t ); float64_t f64_sub( float64_t, float64_t ); +float64_t f64_max( float64_t, float64_t ); +float64_t f64_min( float64_t, float64_t ); float64_t f64_mul( float64_t, float64_t ); float64_t f64_mulAdd( float64_t, float64_t, float64_t ); float64_t f64_div( float64_t, float64_t ); @@ -246,6 +256,8 @@ bool f64_le_quiet( float64_t, float64_t ); bool f64_lt_quiet( float64_t, float64_t ); bool f64_isSignalingNaN( float64_t ); uint_fast16_t f64_classify( float64_t ); +float64_t f64_rsqrte7( float64_t ); +float64_t f64_recip7( float64_t ); /*---------------------------------------------------------------------------- | Rounding precision for 80-bit extended double-precision floating-point. diff --git a/ext/softfloat/softfloat.mk.in b/ext/softfloat/softfloat.mk.in index 7cfe96034b..d3fed2cf25 100644 --- a/ext/softfloat/softfloat.mk.in +++ b/ext/softfloat/softfloat.mk.in @@ -117,6 +117,8 @@ softfloat_c_srcs = \ f64_to_ui32_r_minMag.c \ f64_to_ui64.c \ f64_to_ui64_r_minMag.c \ + fall_maxmin.c \ + fall_reciprocal.c \ i32_to_f128.c \ i32_to_f16.c \ i32_to_f32.c \ diff --git a/src/arch/generic/decoder.hh b/src/arch/generic/decoder.hh index afba1a3e7c..e7d361e6cf 100644 --- a/src/arch/generic/decoder.hh +++ b/src/arch/generic/decoder.hh @@ -48,6 +48,7 @@ class InstDecoder : public SimObject bool instDone = false; bool outOfBytes = true; + bool stall = false; public: template @@ -154,6 +155,15 @@ class InstDecoder : public SimObject * decoder isn't ready (see instReady()). */ virtual StaticInstPtr decode(PCStateBase &pc) = 0; + + /** + * Has decoder been stalled? + * + * This method can be used to check if decoder has been stalled for + * some reason. If so, no more instructions can be fetch from decoder. + * + */ + bool isStalled() { return this->stall; } }; } // namespace gem5 diff --git a/src/arch/isa_parser/operand_types.py b/src/arch/isa_parser/operand_types.py index 63ca765a09..49e7fc84b5 100755 --- a/src/arch/isa_parser/operand_types.py +++ b/src/arch/isa_parser/operand_types.py @@ -377,6 +377,7 @@ def makeRead(self): def makeWrite(self): return f""" + xc->setRegOperand(this, 0, &tmp_d{self.dest_reg_idx}); if (traceData) {{ traceData->setData({self.reg_class}, &tmp_d{self.dest_reg_idx}); }} diff --git a/src/arch/riscv/decoder.cc b/src/arch/riscv/decoder.cc index b816c17b21..c8a6d85476 100644 --- a/src/arch/riscv/decoder.cc +++ b/src/arch/riscv/decoder.cc @@ -38,16 +38,29 @@ namespace gem5 namespace RiscvISA { +GenericISA::BasicDecodeCache Decoder::defaultCache; + void Decoder::reset() { aligned = true; mid = false; + vConfigDone = true; + machInst = 0; emi = 0; } void Decoder::moreBytes(const PCStateBase &pc, Addr fetchPC) { + if (GEM5_UNLIKELY(!this->vConfigDone)) { + DPRINTF(Decode, "Waiting for vset*vl* to be executed\n"); + instDone = false; + outOfBytes = false; + stall = true; + return; + } + stall = false; + // The MSB of the upper and lower halves of a machine instruction. constexpr size_t max_bit = sizeof(machInst) * 8 - 1; constexpr size_t mid_bit = sizeof(machInst) * 4 - 1; @@ -58,36 +71,42 @@ Decoder::moreBytes(const PCStateBase &pc, Addr fetchPC) bool aligned = pc.instAddr() % sizeof(machInst) == 0; if (aligned) { - emi = inst; - if (compressed(emi)) - emi = bits(emi, mid_bit, 0); + emi.instBits = inst; + if (compressed(inst)) + emi.instBits = bits(inst, mid_bit, 0); outOfBytes = !compressed(emi); instDone = true; } else { if (mid) { - assert(bits(emi, max_bit, mid_bit + 1) == 0); - replaceBits(emi, max_bit, mid_bit + 1, inst); + assert(bits(emi.instBits, max_bit, mid_bit + 1) == 0); + replaceBits(emi.instBits, max_bit, mid_bit + 1, inst); mid = false; outOfBytes = false; instDone = true; } else { - emi = bits(inst, max_bit, mid_bit + 1); + emi.instBits = bits(inst, max_bit, mid_bit + 1); mid = !compressed(emi); outOfBytes = true; instDone = compressed(emi); } } + if (instDone) { + emi.vl = this->machVl; + emi.vtype8 = this->machVtype & 0xff; + emi.vill = this->machVtype.vill; + if (vconf(emi)) { + this->vConfigDone = false; // set true when vconfig inst execute + } + } } StaticInstPtr Decoder::decode(ExtMachInst mach_inst, Addr addr) { DPRINTF(Decode, "Decoding instruction 0x%08x at address %#x\n", - mach_inst, addr); + mach_inst.instBits, addr); - StaticInstPtr &si = instMap[mach_inst]; - if (!si) - si = decodeInst(mach_inst); + StaticInstPtr si = defaultCache.decode(this, mach_inst, addr); DPRINTF(Decode, "Decode: Decoded %s instruction: %#x\n", si->getName(), mach_inst); @@ -115,5 +134,14 @@ Decoder::decode(PCStateBase &_next_pc) return decode(emi, next_pc.instAddr()); } +void +Decoder::setVlAndVtype(uint32_t vl, VTYPE vtype) +{ + this->machVtype = vtype; + this->machVl = vl; + + this->vConfigDone = true; +} + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/decoder.hh b/src/arch/riscv/decoder.hh index 15cbefe39c..0dca429cf2 100644 --- a/src/arch/riscv/decoder.hh +++ b/src/arch/riscv/decoder.hh @@ -32,6 +32,7 @@ #include "arch/generic/decode_cache.hh" #include "arch/generic/decoder.hh" +#include "arch/riscv/insts/vector.hh" #include "arch/riscv/types.hh" #include "base/logging.hh" #include "base/types.hh" @@ -50,15 +51,21 @@ namespace RiscvISA class Decoder : public InstDecoder { private: - decode_cache::InstMap instMap; bool aligned; bool mid; - + bool vConfigDone; protected: //The extended machine instruction being generated ExtMachInst emi; uint32_t machInst; + VTYPE machVtype; + uint32_t machVl; + + /// A cache of decoded instruction objects. + static GenericISA::BasicDecodeCache defaultCache; + friend class GenericISA::BasicDecodeCache; + StaticInstPtr decodeInst(ExtMachInst mach_inst); /// Decode a machine instruction. @@ -75,12 +82,17 @@ class Decoder : public InstDecoder void reset() override; inline bool compressed(ExtMachInst inst) { return (inst & 0x3) < 0x3; } + inline bool vconf(ExtMachInst inst) { + return inst.opcode7 == 0b1010111u && inst.width == 0b111u; + } //Use this to give data to the decoder. This should be used //when there is control flow. void moreBytes(const PCStateBase &pc, Addr fetchPC) override; StaticInstPtr decode(PCStateBase &nextPC) override; + + void setVlAndVtype(uint32_t vl, VTYPE vtype); }; } // namespace RiscvISA diff --git a/src/arch/riscv/faults.hh b/src/arch/riscv/faults.hh index e66476727c..fa67e3b34c 100644 --- a/src/arch/riscv/faults.hh +++ b/src/arch/riscv/faults.hh @@ -173,7 +173,7 @@ class InstFault : public RiscvFault : RiscvFault(n, FaultType::OTHERS, INST_ILLEGAL), _inst(inst) {} - RegVal trap_value() const override { return _inst; } + RegVal trap_value() const override { return _inst.instBits; } }; class UnknownInstFault : public InstFault diff --git a/src/arch/riscv/fp_inst.hh b/src/arch/riscv/fp_inst.hh index 604c0169f0..0c59879b72 100644 --- a/src/arch/riscv/fp_inst.hh +++ b/src/arch/riscv/fp_inst.hh @@ -40,4 +40,5 @@ return std::make_shared("RM fault", machInst);\ softfloat_roundingMode = rm; \ + #endif // __ARCH_RISCV_FP_INST_HH__ diff --git a/src/arch/riscv/insts/SConscript b/src/arch/riscv/insts/SConscript index 80592a34ed..efed38c8b3 100644 --- a/src/arch/riscv/insts/SConscript +++ b/src/arch/riscv/insts/SConscript @@ -32,3 +32,4 @@ Source('compressed.cc', tags='riscv isa') Source('mem.cc', tags='riscv isa') Source('standard.cc', tags='riscv isa') Source('static_inst.cc', tags='riscv isa') +Source('vector.cc', tags='riscv isa') diff --git a/src/arch/riscv/insts/amo.cc b/src/arch/riscv/insts/amo.cc index d845c91bf3..052586ecfc 100644 --- a/src/arch/riscv/insts/amo.cc +++ b/src/arch/riscv/insts/amo.cc @@ -32,7 +32,6 @@ #include #include -#include "arch/riscv/insts/bitfields.hh" #include "arch/riscv/utility.hh" #include "cpu/exec_context.hh" #include "cpu/static_inst.hh" @@ -49,7 +48,7 @@ MemFenceMicro::generateDisassembly( Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; - ss << csprintf("0x%08x", machInst) << ' ' << mnemonic; + ss << csprintf("0x%08x", machInst.instBits) << ' ' << mnemonic; return ss.str(); } @@ -66,14 +65,14 @@ LoadReserved::generateDisassembly( { std::stringstream ss; ss << mnemonic; - if (AQ || RL) + if (machInst.aq || machInst.rl) ss << '_'; - if (AQ) + if (machInst.aq) ss << "aq"; - if (RL) + if (machInst.rl) ss << "rl"; - ss << ' ' << registerName(intRegClass[RD]) << ", (" - << registerName(intRegClass[RS1]) << ')'; + ss << ' ' << registerName(intRegClass[machInst.rd]) << ", (" + << registerName(intRegClass[machInst.rs1]) << ')'; return ss.str(); } @@ -94,15 +93,15 @@ StoreCond::generateDisassembly( { std::stringstream ss; ss << mnemonic; - if (AQ || RL) + if (machInst.aq || machInst.rl) ss << '_'; - if (AQ) + if (machInst.aq) ss << "aq"; - if (RL) + if (machInst.rl) ss << "rl"; - ss << ' ' << registerName(intRegClass[RD]) << ", " - << registerName(intRegClass[RS2]) << ", (" - << registerName(intRegClass[RS1]) << ')'; + ss << ' ' << registerName(intRegClass[machInst.rd]) << ", " + << registerName(intRegClass[machInst.rs2]) << ", (" + << registerName(intRegClass[machInst.rs1]) << ')'; return ss.str(); } @@ -124,15 +123,15 @@ AtomicMemOp::generateDisassembly( { std::stringstream ss; ss << mnemonic; - if (AQ || RL) + if (machInst.aq || machInst.rl) ss << '_'; - if (AQ) + if (machInst.aq) ss << "aq"; - if (RL) + if (machInst.rl) ss << "rl"; - ss << ' ' << registerName(intRegClass[RD]) << ", " - << registerName(intRegClass[RS2]) << ", (" - << registerName(intRegClass[RS1]) << ')'; + ss << ' ' << registerName(intRegClass[machInst.rd]) << ", " + << registerName(intRegClass[machInst.rs2]) << ", (" + << registerName(intRegClass[machInst.rs1]) << ')'; return ss.str(); } diff --git a/src/arch/riscv/insts/bitfields.hh b/src/arch/riscv/insts/bitfields.hh deleted file mode 100644 index 7b985dc8e1..0000000000 --- a/src/arch/riscv/insts/bitfields.hh +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef __ARCH_RISCV_BITFIELDS_HH__ -#define __ARCH_RISCV_BITFIELDS_HH__ - -#include "base/bitfield.hh" - -#define CSRIMM bits(machInst, 19, 15) -#define FUNCT12 bits(machInst, 31, 20) -#define IMM5 bits(machInst, 11, 7) -#define IMM7 bits(machInst, 31, 25) -#define IMMSIGN bits(machInst, 31) -#define OPCODE bits(machInst, 6, 0) - -#define AQ bits(machInst, 26) -#define RD bits(machInst, 11, 7) -#define RL bits(machInst, 25) -#define RS1 bits(machInst, 19, 15) -#define RS2 bits(machInst, 24, 20) - -#endif // __ARCH_RISCV_BITFIELDS_HH__ diff --git a/src/arch/riscv/insts/mem.cc b/src/arch/riscv/insts/mem.cc index 36d69853ec..5f58a68a57 100644 --- a/src/arch/riscv/insts/mem.cc +++ b/src/arch/riscv/insts/mem.cc @@ -32,7 +32,6 @@ #include #include -#include "arch/riscv/insts/bitfields.hh" #include "arch/riscv/insts/static_inst.hh" #include "arch/riscv/utility.hh" #include "cpu/static_inst.hh" diff --git a/src/arch/riscv/insts/mem.hh b/src/arch/riscv/insts/mem.hh index 8e95c6b4e7..eeca1434cf 100644 --- a/src/arch/riscv/insts/mem.hh +++ b/src/arch/riscv/insts/mem.hh @@ -48,8 +48,8 @@ class MemInst : public RiscvStaticInst int64_t offset; Request::Flags memAccessFlags; - MemInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass) - : RiscvStaticInst(mnem, _machInst, __opClass), offset(0) + MemInst(const char *mnem, ExtMachInst _extMachInst, OpClass __opClass) + : RiscvStaticInst(mnem, _extMachInst, __opClass), offset(0) {} }; diff --git a/src/arch/riscv/insts/standard.hh b/src/arch/riscv/insts/standard.hh index 5b0e8c2c22..c3adafe415 100644 --- a/src/arch/riscv/insts/standard.hh +++ b/src/arch/riscv/insts/standard.hh @@ -33,7 +33,6 @@ #include -#include "arch/riscv/insts/bitfields.hh" #include "arch/riscv/insts/static_inst.hh" #include "arch/riscv/regs/misc.hh" #include "cpu/exec_context.hh" @@ -66,8 +65,8 @@ class ImmOp : public RiscvStaticInst protected: I imm; - ImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) - : RiscvStaticInst(mnem, _machInst, __opClass), imm(0) + ImmOp(const char *mnem, ExtMachInst _extMachInst, OpClass __opClass) + : RiscvStaticInst(mnem, _extMachInst, __opClass), imm(0) {} }; @@ -93,9 +92,9 @@ class CSROp : public RiscvStaticInst uint64_t uimm; /// Constructor - CSROp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) - : RiscvStaticInst(mnem, _machInst, __opClass), - csr(FUNCT12), uimm(CSRIMM) + CSROp(const char *mnem, ExtMachInst _extMachInst, OpClass __opClass) + : RiscvStaticInst(mnem, _extMachInst, __opClass), + csr(_extMachInst.funct12), uimm(_extMachInst.csrimm) { if (csr == CSR_SATP) { flags[IsSquashAfter] = true; diff --git a/src/arch/riscv/insts/static_inst.hh b/src/arch/riscv/insts/static_inst.hh index bccecf2e2f..5d6cab4961 100644 --- a/src/arch/riscv/insts/static_inst.hh +++ b/src/arch/riscv/insts/static_inst.hh @@ -33,12 +33,12 @@ #include #include "arch/riscv/pcstate.hh" +#include "arch/riscv/regs/misc.hh" #include "arch/riscv/types.hh" #include "cpu/exec_context.hh" #include "cpu/static_inst.hh" #include "cpu/thread_context.hh" #include "mem/packet.hh" - namespace gem5 { diff --git a/src/arch/riscv/insts/unknown.hh b/src/arch/riscv/insts/unknown.hh index 0c2f75e1e9..64f94dea00 100644 --- a/src/arch/riscv/insts/unknown.hh +++ b/src/arch/riscv/insts/unknown.hh @@ -34,7 +34,6 @@ #include #include "arch/riscv/faults.hh" -#include "arch/riscv/insts/bitfields.hh" #include "arch/riscv/insts/static_inst.hh" #include "cpu/exec_context.hh" #include "cpu/static_inst.hh" @@ -60,14 +59,14 @@ class Unknown : public RiscvStaticInst Fault execute(ExecContext *, trace::InstRecord *) const override { - return std::make_shared(machInst); + return std::make_shared(machInst.instBits); } std::string generateDisassembly( Addr pc, const loader::SymbolTable *symtab) const override { - return csprintf("unknown opcode %#02x", OPCODE); + return csprintf("unknown opcode %#02x", machInst.opcode); } }; diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc new file mode 100644 index 0000000000..8b4ec30542 --- /dev/null +++ b/src/arch/riscv/insts/vector.cc @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2022 PLCT Lab + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/riscv/insts/vector.hh" + +#include +#include + +#include "arch/riscv/insts/static_inst.hh" +#include "arch/riscv/utility.hh" +#include "cpu/static_inst.hh" + +namespace gem5 +{ + +namespace RiscvISA +{ + +float +getVflmul(uint32_t vlmul_encoding) { + int vlmul = int8_t(vlmul_encoding << 5) >> 5; + float vflmul = vlmul >= 0 ? 1 << vlmul : 1.0 / (1 << -vlmul); + return vflmul; +} + +uint32_t +getVlmax(VTYPE vtype, uint32_t vlen) { + uint32_t sew = getSew(vtype.vsew); + uint32_t vlmax = (vlen/sew) * getVflmul(vtype.vlmul); + return vlmax; +} + +std::string +VConfOp::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (bit31 && bit30 == 0) { + ss << registerName(srcRegIdx(0)) << ", " << registerName(srcRegIdx(1)); + } else if (bit31 && bit30) { + ss << uimm << ", " << generateZimmDisassembly(); + } else { + ss << registerName(srcRegIdx(0)) << ", " << generateZimmDisassembly(); + } + return ss.str(); +} + +std::string +VConfOp::generateZimmDisassembly() const +{ + std::stringstream s; + + // VSETIVLI uses ZIMM10 and VSETVLI uses ZIMM11 + uint64_t zimm = (bit31 && bit30) ? zimm10 : zimm11; + + bool frac_lmul = bits(zimm, 2); + int sew = 1 << (bits(zimm, 5, 3) + 3); + int lmul = bits(zimm, 1, 0); + auto vta = bits(zimm, 6) == 1 ? "ta" : "tu"; + auto vma = bits(zimm, 7) == 1 ? "ma" : "mu"; + s << "e" << sew; + if (frac_lmul) { + std::string lmul_str = ""; + switch(lmul){ + case 3: + lmul_str = "f2"; + break; + case 2: + lmul_str = "f4"; + break; + case 1: + lmul_str = "f8"; + break; + default: + panic("Unsupport fractional LMUL"); + } + s << ", m" << lmul_str; + } else { + s << ", m" << (1 << lmul); + } + s << ", " << vta << ", " << vma; + return s.str(); +} + +std::string +VectorNonSplitInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorArithMicroInst::generateDisassembly(Addr pc, + const Loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (machInst.funct3 == 0x3) { + // OPIVI + ss << registerName(srcRegIdx(0)) << ", " << machInst.vecimm; + } else { + ss << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0)); + } + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorArithMacroInst::generateDisassembly(Addr pc, + const Loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (machInst.funct3 == 0x3) { + // OPIVI + ss << registerName(srcRegIdx(0)) << ", " << machInst.vecimm; + } else { + ss << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0)); + } + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorVMUNARY0MicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorVMUNARY0MacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorSlideMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (machInst.funct3 == 0x3) { + ss << registerName(srcRegIdx(0)) << ", " << machInst.vecimm; + } else { + ss << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0)); + } + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorSlideMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (machInst.funct3 == 0x3) { + ss << registerName(srcRegIdx(0)) << ", " << machInst.vecimm; + } else { + ss << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0)); + } + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VleMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", " + << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlWholeMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); +} + +std::string VseMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " + << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsWholeMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " + << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); +} + +std::string VleMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')'; + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlWholeMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); +} + +std::string VseMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')'; + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsWholeMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); +} + +std::string VlStrideMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')' << + ", " << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlStrideMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')' << + ", "<< registerName(srcRegIdx(1)); + if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0) + ss << ", " << registerName(srcRegIdx(2)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsStrideMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(2)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')' << + ", " << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsStrideMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(2)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')' << + ", "<< registerName(srcRegIdx(1)); + if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0) + ss << ", " << registerName(srcRegIdx(2)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlIndexMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << '(' << registerName(srcRegIdx(0)) << ")," + << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlIndexMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' + << registerName(destRegIdx(0)) << "[" << uint16_t(vdElemIdx) << "], " + << '(' << registerName(srcRegIdx(0)) << "), " + << registerName(srcRegIdx(1)) << "[" << uint16_t(vs2ElemIdx) << "]"; + if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0) + ss << ", " << registerName(srcRegIdx(2)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsIndexMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(2)) << ", " + << '(' << registerName(srcRegIdx(0)) << ")," + << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsIndexMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' + << registerName(srcRegIdx(2)) << "[" << uint16_t(vs3ElemIdx) << "], " + << '(' << registerName(srcRegIdx(0)) << "), " + << registerName(srcRegIdx(1)) << "[" << uint16_t(vs2ElemIdx) << "]"; + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string +VMvWholeMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + registerName(srcRegIdx(1)); + return ss.str(); +} + +std::string +VMvWholeMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + registerName(srcRegIdx(1)); + return ss.str(); +} + +} // namespace RiscvISA +} // namespace gem5 diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh new file mode 100644 index 0000000000..c6235f884f --- /dev/null +++ b/src/arch/riscv/insts/vector.hh @@ -0,0 +1,628 @@ +/* + * Copyright (c) 2022 PLCT Lab + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_RISCV_INSTS_VECTOR_HH__ +#define __ARCH_RISCV_INSTS_VECTOR_HH__ + +#include + +#include "arch/riscv/insts/static_inst.hh" +#include "arch/riscv/regs/misc.hh" +#include "arch/riscv/utility.hh" +#include "cpu/exec_context.hh" +#include "cpu/static_inst.hh" + +namespace gem5 +{ + +namespace RiscvISA +{ + +float +getVflmul(uint32_t vlmul_encoding); + +inline uint32_t getSew(uint32_t vsew) { + assert(vsew <= 3); + return (8 << vsew); +} + +uint32_t +getVlmax(VTYPE vtype, uint32_t vlen); + +/** + * Base class for Vector Config operations + */ +class VConfOp : public RiscvStaticInst +{ + protected: + uint64_t bit30; + uint64_t bit31; + uint64_t zimm10; + uint64_t zimm11; + uint64_t uimm; + VConfOp(const char *mnem, ExtMachInst _extMachInst, OpClass __opClass) + : RiscvStaticInst(mnem, _extMachInst, __opClass), + bit30(_extMachInst.bit30), bit31(_extMachInst.bit31), + zimm10(_extMachInst.zimm_vsetivli), + zimm11(_extMachInst.zimm_vsetvli), + uimm(_extMachInst.uimm_vsetivli) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; + + std::string generateZimmDisassembly() const; +}; + +inline uint8_t checked_vtype(bool vill, uint8_t vtype) { + panic_if(vill, "vill has been set"); + const uint8_t vsew = bits(vtype, 5, 3); + panic_if(vsew >= 0b100, "vsew: %#x not supported", vsew); + const uint8_t vlmul = bits(vtype, 2, 0); + panic_if(vlmul == 0b100, "vlmul: %#x not supported", vlmul); + return vtype; +} + +class VectorNonSplitInst : public RiscvStaticInst +{ + protected: + uint32_t vl; + uint8_t vtype; + VectorNonSplitInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : RiscvStaticInst(mnem, _machInst, __opClass), + vl(_machInst.vl), + vtype(checked_vtype(_machInst.vill, _machInst.vtype8)) + { + this->flags[IsVector] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorMacroInst : public RiscvMacroInst +{ + protected: + uint32_t vl; + uint8_t vtype; + VectorMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : RiscvMacroInst(mnem, _machInst, __opClass), + vl(_machInst.vl), + vtype(checked_vtype(_machInst.vill, _machInst.vtype8)) + { + this->flags[IsVector] = true; + } +}; + +class VectorMicroInst : public RiscvMicroInst +{ +protected: + uint8_t microVl; + uint8_t microIdx; + uint8_t vtype; + VectorMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + uint8_t _microVl, uint8_t _microIdx) + : RiscvMicroInst(mnem, _machInst, __opClass), + microVl(_microVl), + microIdx(_microIdx), + vtype(_machInst.vtype8) + { + this->flags[IsVector] = true; + } +}; + +class VectorNopMicroInst : public RiscvMicroInst +{ +public: + VectorNopMicroInst(ExtMachInst _machInst) + : RiscvMicroInst("vnop", _machInst, No_OpClass) + {} + + Fault execute(ExecContext* xc, Trace::InstRecord* traceData) + const override + { + return NoFault; + } + + std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) + const override + { + std::stringstream ss; + ss << mnemonic; + return ss.str(); + } +}; + +class VectorArithMicroInst : public VectorMicroInst +{ +protected: + VectorArithMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, + uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorArithMacroInst : public VectorMacroInst +{ + protected: + VectorArithMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMacroInst(mnem, _machInst, __opClass) + { + this->flags[IsVector] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorVMUNARY0MicroInst : public VectorMicroInst +{ +protected: + VectorVMUNARY0MicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, + uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorVMUNARY0MacroInst : public VectorMacroInst +{ + protected: + VectorVMUNARY0MacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMacroInst(mnem, _machInst, __opClass) + { + this->flags[IsVector] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorSlideMacroInst : public VectorMacroInst +{ + protected: + VectorSlideMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMacroInst(mnem, _machInst, __opClass) + { + this->flags[IsVector] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorSlideMicroInst : public VectorMicroInst +{ + protected: + uint8_t vdIdx; + uint8_t vs2Idx; + VectorSlideMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, + uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + , vdIdx(_vdIdx), vs2Idx(_vs2Idx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorMemMicroInst : public VectorMicroInst +{ + protected: + uint32_t offset; // Used to calculate EA. + Request::Flags memAccessFlags; + + VectorMemMicroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, uint8_t _microIdx, + uint32_t _offset) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + , offset(_offset) + , memAccessFlags(0) + {} +}; + +class VectorMemMacroInst : public VectorMacroInst +{ + protected: + VectorMemMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMacroInst(mnem, _machInst, __opClass) + {} +}; + +class VleMacroInst : public VectorMemMacroInst +{ + protected: + VleMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VseMacroInst : public VectorMemMacroInst +{ + protected: + VseMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VleMicroInst : public VectorMicroInst +{ + protected: + Request::Flags memAccessFlags; + + VleMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + uint8_t _microVl, uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + { + this->flags[IsLoad] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VseMicroInst : public VectorMicroInst +{ + protected: + Request::Flags memAccessFlags; + + VseMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + uint8_t _microVl, uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + { + this->flags[IsStore] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlWholeMacroInst : public VectorMemMacroInst +{ + protected: + VlWholeMacroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlWholeMicroInst : public VectorMicroInst +{ + protected: + Request::Flags memAccessFlags; + + VlWholeMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsWholeMacroInst : public VectorMemMacroInst +{ + protected: + VsWholeMacroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsWholeMicroInst : public VectorMicroInst +{ + protected: + Request::Flags memAccessFlags; + + VsWholeMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microIdx, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlStrideMacroInst : public VectorMemMacroInst +{ + protected: + VlStrideMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlStrideMicroInst : public VectorMemMicroInst +{ + protected: + uint8_t regIdx; + VlStrideMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _regIdx, + uint8_t _microIdx, uint8_t _microVl) + : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, 0) + , regIdx(_regIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsStrideMacroInst : public VectorMemMacroInst +{ + protected: + VsStrideMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsStrideMicroInst : public VectorMemMicroInst +{ + protected: + uint8_t regIdx; + VsStrideMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _regIdx, + uint8_t _microIdx, uint8_t _microVl) + : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, 0) + , regIdx(_regIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlIndexMacroInst : public VectorMemMacroInst +{ + protected: + VlIndexMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlIndexMicroInst : public VectorMemMicroInst +{ + protected: + uint8_t vdRegIdx; + uint8_t vdElemIdx; + uint8_t vs2RegIdx; + uint8_t vs2ElemIdx; + VlIndexMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _vdRegIdx, uint8_t _vdElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : VectorMemMicroInst(mnem, _machInst, __opClass, 1, + 0, 0) + , vdRegIdx(_vdRegIdx), vdElemIdx(_vdElemIdx) + , vs2RegIdx(_vs2RegIdx), vs2ElemIdx(_vs2ElemIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsIndexMacroInst : public VectorMemMacroInst +{ + protected: + VsIndexMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsIndexMicroInst : public VectorMemMicroInst +{ + protected: + uint8_t vs3RegIdx; + uint8_t vs3ElemIdx; + uint8_t vs2RegIdx; + uint8_t vs2ElemIdx; + VsIndexMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : VectorMemMicroInst(mnem, _machInst, __opClass, 1, 0, 0) + , vs3RegIdx(_vs3RegIdx), vs3ElemIdx(_vs3ElemIdx) + , vs2RegIdx(_vs2RegIdx), vs2ElemIdx(_vs2ElemIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VMvWholeMacroInst : public VectorArithMacroInst +{ + protected: + VMvWholeMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorArithMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VMvWholeMicroInst : public VectorArithMicroInst +{ + protected: + VMvWholeMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, + uint8_t _microIdx) + : VectorArithMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +template +class VMaskMergeMicroInst : public VectorArithMicroInst +{ + private: + RegId srcRegIdxArr[NumVecInternalRegs]; + RegId destRegIdxArr[1]; + + public: + VMaskMergeMicroInst(ExtMachInst extMachInst, uint8_t _dstReg, + uint8_t _numSrcs) + : VectorArithMicroInst("vmask_mv_micro", extMachInst, + VectorIntegerArithOp, 0, 0) + { + setRegIdxArrays( + reinterpret_cast( + &std::remove_pointer_t::srcRegIdxArr), + reinterpret_cast( + &std::remove_pointer_t::destRegIdxArr)); + + _numSrcRegs = 0; + _numDestRegs = 0; + + setDestRegIdx(_numDestRegs++, vecRegClass[_dstReg]); + _numTypedDestRegs[VecRegClass]++; + for (uint8_t i=0; i<_numSrcs; i++) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + i]); + } + } + + Fault execute(ExecContext* xc, Trace::InstRecord* traceData) + const override { + vreg_t tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0); + auto Vd = tmp_d0.as(); + constexpr uint8_t elems_per_vreg = VLENB / sizeof(ElemType); + size_t bit_cnt = elems_per_vreg; + vreg_t tmp_s; + xc->getRegOperand(this, 0, &tmp_s); + auto s = tmp_s.as(); + // cp the first result and tail + memcpy(Vd, s, VLENB); + for (uint8_t i = 1; i < this->_numSrcRegs; i++) { + xc->getRegOperand(this, i, &tmp_s); + s = tmp_s.as(); + if constexpr (elems_per_vreg < 8) { + constexpr uint8_t m = (1 << elems_per_vreg) - 1; + const uint8_t mask = m << (i * elems_per_vreg % 8); + // clr & ext bits + Vd[bit_cnt/8] ^= Vd[bit_cnt/8] & mask; + Vd[bit_cnt/8] |= s[bit_cnt/8] & mask; + bit_cnt += elems_per_vreg; + } else { + constexpr uint8_t byte_offset = elems_per_vreg / 8; + memcpy(Vd + i * byte_offset, s + i * byte_offset, byte_offset); + } + } + xc->setRegOperand(this, 0, &tmp_d0); + if (traceData) + traceData->setData(tmp_d0); + return NoFault; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)); + for (uint8_t i = 0; i < this->_numSrcRegs; i++) { + ss << ", " << registerName(srcRegIdx(i)); + } + ss << ", offset:" << VLENB / sizeof(ElemType); + return ss.str(); + } +}; + +class VxsatMicroInst : public VectorArithMicroInst +{ + private: + bool* vxsat; + public: + VxsatMicroInst(bool* Vxsat, ExtMachInst extMachInst) + : VectorArithMicroInst("vxsat_micro", extMachInst, + VectorIntegerArithOp, 0, 0) + { + vxsat = Vxsat; + } + Fault execute(ExecContext* xc, Trace::InstRecord* traceData) + const override + { + xc->setMiscReg(MISCREG_VXSAT,*vxsat); + auto vcsr = xc->readMiscReg(MISCREG_VCSR); + xc->setMiscReg(MISCREG_VCSR, ((vcsr&~1)|*vxsat)); + return NoFault; + } + std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) + const override + { + std::stringstream ss; + ss << mnemonic << ' ' << "VXSAT" << ", " << (*vxsat ? "0x1" : "0x0"); + return ss.str(); + } +}; + +} // namespace RiscvISA +} // namespace gem5 + + +#endif // __ARCH_RISCV_INSTS_VECTOR_HH__ diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index c8eabd44ad..ac53fe39a1 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -41,6 +41,7 @@ #include "arch/riscv/regs/float.hh" #include "arch/riscv/regs/int.hh" #include "arch/riscv/regs/misc.hh" +#include "arch/riscv/regs/vector.hh" #include "base/bitfield.hh" #include "base/compiler.hh" #include "base/logging.hh" @@ -49,6 +50,7 @@ #include "debug/Checkpoint.hh" #include "debug/LLSC.hh" #include "debug/RiscvMisc.hh" +#include "debug/VecRegs.hh" #include "mem/packet.hh" #include "mem/request.hh" #include "params/RiscvISA.hh" @@ -186,6 +188,14 @@ namespace RiscvISA [MISCREG_FFLAGS] = "FFLAGS", [MISCREG_FRM] = "FRM", + [MISCREG_VSTART] = "VSTART", + [MISCREG_VXSAT] = "VXSAT", + [MISCREG_VXRM] = "VXRM", + [MISCREG_VCSR] = "VCSR", + [MISCREG_VL] = "VL", + [MISCREG_VTYPE] = "VTYPE", + [MISCREG_VLENB] = "VLENB", + [MISCREG_NMIVEC] = "NMIVEC", [MISCREG_NMIE] = "NMIE", [MISCREG_NMIP] = "NMIP", @@ -195,8 +205,6 @@ namespace { /* Not applicable to RISCV */ -RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs); -RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, debug::IntRegs); RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); @@ -234,6 +242,8 @@ ISA::copyRegsFrom(ThreadContext *src) for (auto &id: floatRegClass) tc->setReg(id, src->getReg(id)); + // TODO: Copy vector regs. + // Lastly copy PC/NPC tc->pcState(src->pcState()); } @@ -393,6 +403,17 @@ ISA::readMiscReg(RegIndex idx) return readMiscRegNoEffect(idx); } + case MISCREG_VLENB: + { + return VLENB; + } + break; + case MISCREG_VCSR: + { + return readMiscRegNoEffect(MISCREG_VXSAT) & + (readMiscRegNoEffect(MISCREG_VXRM) << 1); + } + break; default: // Try reading HPM counters // As a placeholder, all HPM counters are just cycle counters @@ -537,6 +558,22 @@ ISA::setMiscReg(RegIndex idx, RegVal val) setMiscRegNoEffect(idx, val); } break; + case MISCREG_VXSAT: + { + setMiscRegNoEffect(misc_reg, val & 0x1); + } + break; + case MISCREG_VXRM: + { + setMiscRegNoEffect(misc_reg, val & 0x3); + } + break; + case MISCREG_VCSR: + { + setMiscRegNoEffect(MISCREG_VXSAT, val & 0x1); + setMiscRegNoEffect(MISCREG_VXRM, (val & 0x6) >> 1); + } + break; default: setMiscRegNoEffect(idx, val); } diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh index e332956972..3c968b3636 100644 --- a/src/arch/riscv/isa.hh +++ b/src/arch/riscv/isa.hh @@ -67,6 +67,14 @@ enum FPUStatus DIRTY = 3, }; +enum class VPUStatus +{ + OFF = 0, + INITIAL = 1, + CLEAN = 2, + DIRTY = 3, +}; + class ISA : public BaseISA { protected: diff --git a/src/arch/riscv/isa/bitfields.isa b/src/arch/riscv/isa/bitfields.isa index 41935c5b0f..eaec5ee08f 100644 --- a/src/arch/riscv/isa/bitfields.isa +++ b/src/arch/riscv/isa/bitfields.isa @@ -130,3 +130,27 @@ def bitfield BIT24 <24>; def bitfield RNUM <23:20>; def bitfield KFUNCT5 <29:25>; def bitfield BS <31:30>; + +// Vector instructions +def bitfield VFUNCT6 <31:26>; +def bitfield VFUNCT5 <31:27>; +def bitfield VFUNCT3 <27:25>; +def bitfield VFUNCT2 <26:25>; + +def bitfield VS3 <11:7>; +def bitfield VS2 <24:20>; +def bitfield VS1 <19:15>; +def bitfield VD <11:7>; + +def bitfield NF <31:29>; +def bitfield MEW <28:28>; +def bitfield MOP <27:26>; +def bitfield VM <25>; +def bitfield LUMOP <24:20>; +def bitfield SUMOP <24:20>; +def bitfield WIDTH <14:12>; + +def bitfield BIT31 <31>; +def bitfield BIT30 <30>; +def bitfield SIMM5 <19:15>; +def bitfield SIMM3 <17:15>; diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index c6b74ff44f..83bdd3ba19 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -42,7 +42,7 @@ decode QUADRANT default Unknown::unknown() { CIMM8<7:6> << 4 | CIMM8<5:2> << 6; }}, {{ - if (machInst == 0) + if (machInst.instBits == 0) return std::make_shared("zero instruction", machInst); Rp2 = sp + imm; @@ -428,6 +428,174 @@ decode QUADRANT default Unknown::unknown() { Fd_bits = fd.v; }}, inst_flags=FloatMemReadOp); } + + 0x0: decode MOP { + 0x0: decode LUMOP { + 0x00: VleOp::vle8_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl) { + Vd_ub[i] = Mem_vc.as()[i]; + } else { + Vd_ub[i] = Vs2_ub[i]; + } + }}, inst_flags=VectorUnitStrideLoadOp); + 0x08: decode NF { + format VlWholeOp { + 0x0: vl1re8_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x1: vl2re8_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x3: vl4re8_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x7: vl8re8_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + } + } + 0x0b: VlmOp::vlm_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorUnitStrideMaskLoadOp); + } + 0x1: VlIndexOp::vluxei8_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_ub[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + 0x2: VlStrideOp::vlse8_v({{ + Vd_ub[microIdx] = Mem_vc.as()[0]; + }}, inst_flags=VectorStridedLoadOp); + 0x3: VlIndexOp::vloxei8_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_ub[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + } + 0x5: decode MOP { + 0x0: decode LUMOP { + 0x00: VleOp::vle16_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl) { + Vd_uh[i] = Mem_vc.as()[i]; + } else { + Vd_uh[i] = Vs2_uh[i]; + } + }}, inst_flags=VectorUnitStrideLoadOp); + 0x08: decode NF { + format VlWholeOp { + 0x0: vl1re16_v({{ + Vd_uh[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x1: vl2re16_v({{ + Vd_uh[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x3: vl4re16_v({{ + Vd_uh[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x7: vl8re16_v({{ + Vd_uh[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + } + } + } + 0x1: VlIndexOp::vluxei16_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_uh[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + 0x2: VlStrideOp::vlse16_v({{ + Vd_uh[microIdx] = Mem_vc.as()[0]; + }}, inst_flags=VectorStridedLoadOp); + 0x3: VlIndexOp::vloxei16_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_uh[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + } + 0x6: decode MOP { + 0x0: decode LUMOP { + 0x00: VleOp::vle32_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl) { + Vd_uw[i] = Mem_vc.as()[i]; + } else { + Vd_uw[i] = Vs2_uw[i]; + } + }}, inst_flags=VectorUnitStrideLoadOp); + 0x08: decode NF { + format VlWholeOp { + 0x0: vl1re32_v({{ + Vd_uw[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x1: vl2re32_v({{ + Vd_uw[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x3: vl4re32_v({{ + Vd_uw[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x7: vl8re32_v({{ + Vd_uw[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + } + } + } + 0x1: VlIndexOp::vluxei32_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_uw[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + 0x2: VlStrideOp::vlse32_v({{ + Vd_uw[microIdx] = Mem_vc.as()[0]; + }}, inst_flags=VectorStridedLoadOp); + 0x3: VlIndexOp::vloxei32_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_uw[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + } + 0x7: decode MOP { + 0x0: decode LUMOP { + 0x00: VleOp::vle64_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl) { + Vd_ud[i] = Mem_vc.as()[i]; + } else { + Vd_ud[i] = Vs2_ud[i]; + } + }}, inst_flags=VectorUnitStrideLoadOp); + 0x08: decode NF { + format VlWholeOp { + 0x0: vl1re64_v({{ + Vd_ud[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x1: vl2re64_v({{ + Vd_ud[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x3: vl4re64_v({{ + Vd_ud[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x7: vl8re64_v({{ + Vd_ud[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + } + } + } + 0x1: VlIndexOp::vluxei64_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_ud[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + 0x2: VlStrideOp::vlse64_v({{ + Vd_ud[microIdx] = Mem_vc.as()[0]; + }}, inst_flags=VectorStridedLoadOp); + 0x3: VlIndexOp::vloxei64_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_ud[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + } } 0x03: decode FUNCT3 { @@ -673,6 +841,106 @@ decode QUADRANT default Unknown::unknown() { Mem_ud = Fs2_bits; }}, inst_flags=FloatMemWriteOp); } + + 0x0: decode MOP { + 0x0: decode SUMOP { + 0x00: VseOp::vse8_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorUnitStrideStoreOp); + format VsWholeOp { + 0x8: decode NF { + 0x0: vs1r_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorWholeRegisterStoreOp); + 0x1: vs2r_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorWholeRegisterStoreOp); + 0x3: vs4r_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorWholeRegisterStoreOp); + 0x7: vs8r_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorWholeRegisterStoreOp); + } + } + 0x0b: VsmOp::vsm_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorUnitStrideMaskStoreOp); + } + 0x1: VsIndexOp::vsuxei8_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_ub[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + 0x2: VsStrideOp::vsse8_v({{ + Mem_vc.as()[0] = Vs3_ub[microIdx]; + }}, inst_flags=VectorStridedStoreOp); + 0x3: VsIndexOp::vsoxei8_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_ub[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + } + 0x5: decode MOP { + 0x0: decode SUMOP { + 0x00: VseOp::vse16_v({{ + Mem_vc.as()[i] = Vs3_uh[i]; + }}, inst_flags=VectorUnitStrideStoreOp); + } + 0x1: VsIndexOp::vsuxei16_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_uh[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + 0x2: VsStrideOp::vsse16_v({{ + Mem_vc.as()[0] = Vs3_uh[microIdx]; + }}, inst_flags=VectorStridedStoreOp); + 0x3: VsIndexOp::vsoxei16_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_uh[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + } + 0x6: decode MOP { + 0x0: decode SUMOP { + 0x00: VseOp::vse32_v({{ + Mem_vc.as()[i] = Vs3_uw[i]; + }}, inst_flags=VectorUnitStrideStoreOp); + } + 0x1: VsIndexOp::vsuxei32_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_uw[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + 0x2: VsStrideOp::vsse32_v({{ + Mem_vc.as()[0] = Vs3_uw[microIdx]; + }}, inst_flags=VectorStridedStoreOp); + 0x3: VsIndexOp::vsoxei32_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_uw[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + } + 0x7: decode MOP { + 0x0: decode SUMOP { + 0x00: VseOp::vse64_v({{ + Mem_vc.as()[i] = Vs3_ud[i]; + }}, inst_flags=VectorUnitStrideStoreOp); + } + 0x1: VsIndexOp::vsuxei64_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_ud[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + 0x2: VsStrideOp::vsse64_v({{ + Mem_vc.as()[0] = Vs3_ud[microIdx]; + }}, inst_flags=VectorStridedStoreOp); + 0x3: VsIndexOp::vsoxei64_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_ud[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + } } 0x0b: decode FUNCT3 { @@ -874,26 +1142,7 @@ decode QUADRANT default Unknown::unknown() { Rd = Rs1 << Rs2<5:0>; }}); 0x1: mulh({{ - bool negate = (Rs1_sd < 0) != (Rs2_sd < 0); - - uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd); - uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32; - uint64_t Rs2_lo = (uint32_t)std::abs(Rs2_sd); - uint64_t Rs2_hi = (uint64_t)std::abs(Rs2_sd) >> 32; - - uint64_t hi = Rs1_hi*Rs2_hi; - uint64_t mid1 = Rs1_hi*Rs2_lo; - uint64_t mid2 = Rs1_lo*Rs2_hi; - uint64_t lo = Rs2_lo*Rs1_lo; - uint64_t carry = ((uint64_t)(uint32_t)mid1 - + (uint64_t)(uint32_t)mid2 + (lo >> 32)) >> 32; - - uint64_t res = hi + - (mid1 >> 32) + - (mid2 >> 32) + - carry; - Rd = negate ? ~res + (Rs1_sd*Rs2_sd == 0 ? 1 : 0) - : res; + Rd_sd = mulh(Rs1_sd, Rs2_sd); }}, IntMultOp); 0x5: clmul({{ uint64_t result = 0; @@ -926,24 +1175,7 @@ decode QUADRANT default Unknown::unknown() { Rd = (Rs1_sd < Rs2_sd) ? 1 : 0; }}); 0x1: mulhsu({{ - bool negate = Rs1_sd < 0; - uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd); - uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32; - uint64_t Rs2_lo = (uint32_t)Rs2; - uint64_t Rs2_hi = Rs2 >> 32; - - uint64_t hi = Rs1_hi*Rs2_hi; - uint64_t mid1 = Rs1_hi*Rs2_lo; - uint64_t mid2 = Rs1_lo*Rs2_hi; - uint64_t lo = Rs1_lo*Rs2_lo; - uint64_t carry = ((uint64_t)(uint32_t)mid1 - + (uint64_t)(uint32_t)mid2 + (lo >> 32)) >> 32; - - uint64_t res = hi + - (mid1 >> 32) + - (mid2 >> 32) + - carry; - Rd = negate ? ~res + (Rs1_sd*Rs2 == 0 ? 1 : 0) : res; + Rd_sd = mulhsu(Rs1_sd, Rs2); }}, IntMultOp); 0x5: clmulr({{ uint64_t result = 0; @@ -966,19 +1198,7 @@ decode QUADRANT default Unknown::unknown() { Rd = (Rs1 < Rs2) ? 1 : 0; }}); 0x1: mulhu({{ - uint64_t Rs1_lo = (uint32_t)Rs1; - uint64_t Rs1_hi = Rs1 >> 32; - uint64_t Rs2_lo = (uint32_t)Rs2; - uint64_t Rs2_hi = Rs2 >> 32; - - uint64_t hi = Rs1_hi*Rs2_hi; - uint64_t mid1 = Rs1_hi*Rs2_lo; - uint64_t mid2 = Rs1_lo*Rs2_hi; - uint64_t lo = Rs1_lo*Rs2_lo; - uint64_t carry = ((uint64_t)(uint32_t)mid1 - + (uint64_t)(uint32_t)mid2 + (lo >> 32)) >> 32; - - Rd = hi + (mid1 >> 32) + (mid2 >> 32) + carry; + Rd = mulhu(Rs1, Rs2); }}, IntMultOp); 0x5: clmulh({{ uint64_t result = 0; @@ -1812,6 +2032,2093 @@ decode QUADRANT default Unknown::unknown() { } } + 0x15: decode FUNCT3 { + // OPIVV + 0x0: decode VFUNCT6 { + format VectorIntFormat { + 0x0: vadd_vv({{ + Vd_vu[i] = Vs2_vu[i] + Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x2: vsub_vv({{ + Vd_vu[i] = Vs2_vu[i] - Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x4: vminu_vv({{ + Vd_vu[i] = Vs2_vu[i] < Vs1_vu[i] ? + Vs2_vu[i] : Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x5: vmin_vv({{ + Vd_vi[i] = Vs2_vi[i] < Vs1_vi[i] ? + Vs2_vi[i] : Vs1_vi[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x6: vmaxu_vv({{ + Vd_vu[i] = Vs2_vu[i] > Vs1_vu[i] ? + Vs2_vu[i] : Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x7: vmax_vv({{ + Vd_vi[i] = Vs2_vi[i] > Vs1_vi[i] ? + Vs2_vi[i] : Vs1_vi[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x9: vand_vv({{ + Vd_vu[i] = Vs2_vu[i] & Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0xa: vor_vv({{ + Vd_vu[i] = Vs2_vu[i] | Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0xb: vxor_vv({{ + Vd_vu[i] = Vs2_vu[i] ^ Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + } + 0x0c: VectorGatherFormat::vrgather_vv({{ + for (uint32_t i = 0; i < microVl; i++) { + uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + if (this->vm || elem_mask(v0, ei)) { + const uint64_t idx = Vs1_vu[i] + - vs2_elems * vs2_idx; + auto res = (Vs1_vu[i] >= vlmax) ? 0 + : (idx < vs2_elems) ? Vs2_vu[idx] + : Vs3_vu[i]; + Vd_vu[i] = res; + } + } + }}, OPIVV, VectorMiscOp); + 0x0e: VectorGatherFormat::vrgatherei16_vv({{ + for (uint32_t i = 0; i < microVl; i++) { + uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + if (this->vm || elem_mask(v0, ei)) { + const uint16_t idx = Vs1_uh[i + vs1_bias] + - vs2_elems * vs2_idx; + auto res = (Vs1_uh[i + vs1_bias] >= vlmax) ? 0 + : (idx < vs2_elems) ? Vs2_vu[idx] + : Vs3_vu[i + vd_bias]; + Vd_vu[i + vd_bias] = res; + } + } + }}, OPIVV, VectorMiscOp); + format VectorIntFormat { + 0x10: decode VM { + 0x0: vadc_vvm({{ + Vd_vi[i] = Vs2_vi[i] + Vs1_vi[i] + + elem_mask(v0, ei); + }}, OPIVV, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x12: decode VM { + 0x0: vsbc_vvm({{ + Vd_vi[i] = Vs2_vi[i] - Vs1_vi[i] + - elem_mask(v0, ei); + }}, OPIVV, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x17: decode VM { + 0x0: vmerge_vvm({{ + Vd_vu[i] = elem_mask(v0, ei) + ? Vs1_vu[i] + : Vs2_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x1: decode VS2 { + 0x0: vmv_v_v({{ + Vd_vu[i] = Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + } + } + } + format VectorIntVxsatFormat{ + 0x20: vsaddu_vv({{ + Vd_vu[i] = sat_addu(Vs2_vu[i], Vs1_vu[i], + vxsatptr); + }}, OPIVV, VectorIntegerArithOp); + 0x21: vsadd_vv({{ + Vd_vu[i] = sat_add(Vs2_vu[i], Vs1_vu[i], + vxsatptr); + }}, OPIVV, VectorIntegerArithOp); + 0x22: vssubu_vv({{ + Vd_vu[i] = sat_subu(Vs2_vu[i], Vs1_vu[i], + vxsatptr); + }}, OPIVV, VectorIntegerArithOp); + 0x23: vssub_vv({{ + Vd_vu[i] = sat_sub(Vs2_vu[i], Vs1_vu[i], + vxsatptr); + }}, OPIVV, VectorIntegerArithOp); + 0x27: vsmul_vv({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + bool overflow = Vs1_vi[i] == Vs2_vi[i] && + Vs1_vi[i] == min; + __int128_t result = (__int128_t)Vs1_vi[i] * + (__int128_t)Vs2_vi[i]; + result = int_rounding<__int128_t>( + result, 0 /* TODO */, sew - 1); + result = result >> (sew - 1); + if (overflow) { + result = max; + *vxsatptr = true; + } + + Vd_vi[i] = (vi)result; + }}, OPIVV, VectorIntegerArithOp); + } + format VectorIntFormat { + 0x25: vsll_vv({{ + Vd_vu[i] = Vs2_vu[i] << (Vs1_vu[i] & (sew - 1)); + }}, OPIVV, VectorIntegerArithOp); + 0x28: vsrl_vv({{ + Vd_vu[i] = Vs2_vu[i] >> (Vs1_vu[i] & (sew - 1)); + }}, OPIVV, VectorIntegerArithOp); + 0x29: vsra_vv({{ + Vd_vi[i] = Vs2_vi[i] >> (Vs1_vu[i] & (sew - 1)); + }}, OPIVV, VectorIntegerArithOp); + 0x2a: vssrl_vv({{ + int sh = Vs1_vu[i] & (sew - 1); + __uint128_t val = Vs2_vu[i]; + + val = int_rounding<__uint128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vu[i] = val >> sh; + }}, OPIVV, VectorIntegerArithOp); + 0x2b: vssra_vv({{ + int sh = Vs1_vi[i] & (sew - 1); + __int128_t val = Vs2_vi[i]; + + val = int_rounding<__int128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vi[i] = val >> sh; + }}, OPIVV, VectorIntegerArithOp); + } + format VectorReduceIntWideningFormat { + 0x30: vwredsumu_vs({{ + Vd_vwu[0] = reduce_loop(std::plus(), + Vs1_vwu, Vs2_vu); + }}, OPIVV, VectorIntegerReduceOp); + 0x31: vwredsum_vs({{ + Vd_vwu[0] = reduce_loop(std::plus(), + Vs1_vwi, Vs2_vi); + }}, OPIVV, VectorIntegerReduceOp); + } + format VectorIntMaskFormat { + 0x11: decode VM { + 0x0: vmadc_vvm({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vu[i], Vs1_vu[i], + elem_mask(v0, ei))); + }}, OPIVV, VectorIntegerArithOp); + 0x1: vmadc_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vu[i], Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + } + 0x13: decode VM { + 0x0: vmsbc_vvm({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + borrow_out(Vs2_vi[i], Vs1_vi[i], + elem_mask(v0, ei))); + }}, OPIVV, VectorIntegerArithOp); + 0x1: vmsbc_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + borrow_out(Vs2_vi[i], Vs1_vi[i])); + }}, OPIVV, VectorIntegerArithOp); + } + 0x18: vmseq_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] == Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x19: vmsne_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] != Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x1a: vmsltu_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] < Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x1b: vmslt_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] < Vs1_vi[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x1c: vmsleu_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] <= Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x1d: vmsle_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] <= Vs1_vi[i])); + }}, OPIVV, VectorIntegerArithOp); + } + format VectorIntNarrowingFormat { + 0x2c: vnsrl_wv({{ + Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >> + ((vwu)Vs1_vu[i + offset] & (sew * 2 - 1))); + }}, OPIVV, VectorIntegerArithOp); + 0x2d: vnsra_wv({{ + Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >> + ((vwu)Vs1_vu[i + offset] & (sew * 2 - 1))); + }}, OPIVV, VectorIntegerArithOp); + 0x2e: vnclipu_wv({{ + vu max = std::numeric_limits::max(); + uint64_t sign_mask = + std::numeric_limits::max() << sew; + __uint128_t res = Vs2_vwu[i]; + unsigned shift = Vs1_vu[i + offset] & ((sew * 2) - 1); + + res = int_rounding<__uint128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res & sign_mask) { + res = max; + // TODO: vxsat + } + + Vd_vu[i + offset] = (vu)res; + }}, OPIVV, VectorIntegerArithOp); + 0x2f: vnclip_wv({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + __int128_t res = Vs2_vwi[i]; + unsigned shift = Vs1_vi[i + offset] & ((sew * 2) - 1); + + res = int_rounding<__int128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res < min) { + res = min; + // TODO: vxsat + } else if (res > max) { + res = max; + // TODO: vxsat + } + + Vd_vi[i + offset] = (vi)res; + }}, OPIVV, VectorIntegerArithOp); + } + } + // OPFVV + 0x1: decode VFUNCT6 { + 0x00: VectorFloatFormat::vfadd_vv({{ + auto fd = fadd(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x01: VectorReduceFloatFormat::vfredusum_vs({{ + Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) { + return fadd(ftype(src1), ftype(src2)); + }, Vs1_vu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x02: VectorFloatFormat::vfsub_vv({{ + auto fd = fsub(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x03: VectorReduceFloatFormat::vfredosum_vs({{ + Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) { + return fadd(ftype(src1), ftype(src2)); + }, Vs1_vu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x04: VectorFloatFormat::vfmin_vv({{ + auto fd = fmin(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x05: VectorReduceFloatFormat::vfredmin_vs({{ + Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) { + return fmin(ftype(src1), ftype(src2)); + }, Vs1_vu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x06: VectorFloatFormat::vfmax_vv({{ + auto fd = fmax(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x07: VectorReduceFloatFormat::vfredmax_vs({{ + Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) { + return fmax(ftype(src1), ftype(src2)); + }, Vs1_vu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x08: VectorFloatFormat::vfsgnj_vv({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]), + false, false).v; + }}, OPFVV, VectorFloatArithOp); + 0x09: VectorFloatFormat::vfsgnjn_vv({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]), + true, false).v; + }}, OPFVV, VectorFloatArithOp); + 0x0a: VectorFloatFormat::vfsgnjx_vv({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]), + false, true).v; + }}, OPFVV, VectorFloatArithOp); + // VWFUNARY0 + 0x10: decode VS1 { + 0x00: decode VM { + // The encodings corresponding to the masked versions + // (vm=0) of vfmv.f.s are reserved + 0x1: VectorNonSplitFormat::vfmv_f_s({{ + freg_t fd = freg(Vs2_vu[0]); + Fd_bits = fd.v; + }}, OPFVV, VectorMiscOp); + } + } + 0x12: decode VS1 { + format VectorFloatCvtFormat { + 0x00: vfcvt_xu_f_v({{ + Vd_vu[i] = f_to_ui(ftype(Vs2_vu[i]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x01: vfcvt_x_f_v({{ + Vd_vu[i] = f_to_i(ftype(Vs2_vu[i]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x02: vfcvt_f_xu_v({{ + auto fd = ui_to_f(Vs2_vu[i]); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x03: vfcvt_f_x_v({{ + auto fd = i_to_f(Vs2_vu[i]); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x06: vfcvt_rtz_xu_f_v({{ + Vd_vu[i] = f_to_ui(ftype(Vs2_vu[i]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + 0x07: vfcvt_rtz_x_f_v({{ + Vd_vu[i] = f_to_i(ftype(Vs2_vu[i]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + } + format VectorFloatWideningCvtFormat { + 0x08: vfwcvt_xu_f_v({{ + Vd_vwu[i] = f_to_wui( + ftype(Vs2_vu[i + offset]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x09: vfwcvt_x_f_v({{ + Vd_vwu[i] = f_to_wi( + ftype(Vs2_vu[i + offset]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x0a: vfwcvt_f_xu_v({{ + auto fd = ui_to_wf(Vs2_vu[i + offset]); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x0b: vfwcvt_f_x_v({{ + auto fd = i_to_wf(Vs2_vu[i + offset]); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x0c: vfwcvt_f_f_v({{ + auto fd = f_to_wf( + ftype(Vs2_vu[i + offset])); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x0e: vfwcvt_rtz_xu_f_v({{ + Vd_vwu[i] = f_to_wui( + ftype(Vs2_vu[i + offset]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + 0x0f: vfwcvt_rtz_x_f_v({{ + Vd_vwu[i] = f_to_wi( + ftype(Vs2_vu[i + offset]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + } + format VectorFloatNarrowingCvtFormat { + 0x10: vfncvt_xu_f_w({{ + Vd_vu[i + offset] = f_to_nui( + ftype(Vs2_vwu[i]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x11: vfncvt_x_f_w({{ + Vd_vu[i + offset] = f_to_ni( + ftype(Vs2_vwu[i]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x12: vfncvt_f_xu_w({{ + auto fd = ui_to_nf(Vs2_vwu[i]); + Vd_vu[i + offset] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x13: vfncvt_f_x_w({{ + auto fd = i_to_nf(Vs2_vwu[i]); + Vd_vu[i + offset] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x14: vfncvt_f_f_w({{ + auto fd = f_to_nf(ftype(Vs2_vwu[i])); + Vd_vu[i + offset] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x15: vfncvt_rod_f_f_w({{ + softfloat_roundingMode = softfloat_round_odd; + auto fd = f_to_nf(ftype(Vs2_vwu[i])); + Vd_vu[i + offset] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x16: vfncvt_rtz_xu_f_w({{ + Vd_vu[i + offset] = f_to_nui( + ftype(Vs2_vwu[i]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + 0x17: vfncvt_rtz_x_f_w({{ + Vd_vu[i + offset] = f_to_ni( + ftype(Vs2_vwu[i]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + } + } + 0x13: decode VS1 { + format VectorFloatCvtFormat { + 0x00: vfsqrt_v({{ + auto fd = fsqrt(ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x04: vfrsqrt7_v({{ + auto fd = frsqrte7(ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x05: vfrec7_v({{ + auto fd = frecip7(ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x10: vfclass_v({{ + auto fd = fclassify(ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + } + } + + format VectorFloatMaskFormat { + 0x18: vmfeq_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + feq(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]))); + }}, OPFVV, VectorFloatArithOp); + 0x19: vmfle_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + fle(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]))); + }}, OPFVV, VectorFloatArithOp); + 0x1b: vmflt_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + flt(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]))); + }}, OPFVV, VectorFloatArithOp); + 0x1c: vmfne_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + !feq(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]))); + }}, OPFVV, VectorFloatArithOp); + } + format VectorFloatFormat { + 0x20: vfdiv_vv({{ + auto fd = fdiv(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x24: vfmul_vv({{ + auto fd = fmul(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x28: vfmadd_vv({{ + auto fd = fmadd(ftype(Vs3_vu[i]), + ftype(Vs1_vu[i]), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x29: vfnmadd_vv({{ + auto fd = fmadd(fneg(ftype(Vs3_vu[i])), + ftype(Vs1_vu[i]), + fneg(ftype(Vs2_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2a: vfmsub_vv({{ + auto fd = fmadd(ftype(Vs3_vu[i]), + ftype(Vs1_vu[i]), + fneg(ftype(Vs2_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2b: vfnmsub_vv({{ + auto fd = fmadd(fneg(ftype(Vs3_vu[i])), + ftype(Vs1_vu[i]), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2c: vfmacc_vv({{ + auto fd = fmadd(ftype(Vs1_vu[i]), + ftype(Vs2_vu[i]), + ftype(Vs3_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2d: vfnmacc_vv({{ + auto fd = fmadd(fneg(ftype(Vs1_vu[i])), + ftype(Vs2_vu[i]), + fneg(ftype(Vs3_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2e: vfmsac_vv({{ + auto fd = fmadd(ftype(Vs1_vu[i]), + ftype(Vs2_vu[i]), + fneg(ftype(Vs3_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2f: vfnmsac_vv({{ + auto fd = fmadd(fneg(ftype(Vs1_vu[i])), + ftype(Vs2_vu[i]), + ftype(Vs3_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x31: VectorReduceFloatWideningFormat::vfwredusum_vs({{ + Vd_vwu[0] = reduce_loop( + [](const vwu& src1, const vu& src2) { + return fadd( + ftype(src1), + f_to_wf(ftype(src2)) + ); + }, Vs1_vwu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x33: VectorReduceFloatWideningFormat::vfwredosum_vs({{ + Vd_vwu[0] = reduce_loop( + [](const vwu& src1, const vu& src2) { + return fadd( + ftype(src1), + f_to_wf(ftype(src2)) + ); + }, Vs1_vwu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + } + format VectorFloatWideningFormat { + 0x30: vfwadd_vv({{ + auto fd = fadd( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x32: vfwsub_vv({{ + auto fd = fsub( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x34: vfwadd_wv({{ + auto fd = fadd( + ftype(Vs2_vwu[i]), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x36: vfwsub_wv({{ + auto fd = fsub( + ftype(Vs2_vwu[i]), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x38: vfwmul_vv({{ + auto fd = fmul( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x3c: vfwmacc_vv({{ + auto fd = fmadd( + fwiden(ftype(Vs1_vu[i + offset])), + fwiden(ftype(Vs2_vu[i + offset])), + ftype(Vs3_vwu[i])); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x3d: vfwnmacc_vv({{ + auto fd = fmadd( + fwiden(fneg(ftype(Vs1_vu[i + offset]))), + fwiden(ftype(Vs2_vu[i + offset])), + fneg(ftype(Vs3_vwu[i]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x3e: vfwmsac_vv({{ + auto fd = fmadd( + fwiden(ftype(Vs1_vu[i + offset])), + fwiden(ftype(Vs2_vu[i + offset])), + fneg(ftype(Vs3_vwu[i]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x3f: vfwnmsac_vv({{ + auto fd = fmadd( + fwiden(fneg(ftype(Vs1_vu[i + offset]))), + fwiden(ftype(Vs2_vu[i + offset])), + ftype(Vs3_vwu[i])); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + } + } + // OPMVV + 0x2: decode VFUNCT6 { + format VectorReduceIntFormat { + 0x0: vredsum_vs({{ + Vd_vi[0] = + reduce_loop(std::plus(), Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x1: vredand_vs({{ + Vd_vi[0] = + reduce_loop(std::bit_and(), Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x2: vredor_vs({{ + Vd_vi[0] = + reduce_loop(std::bit_or(), Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x3: vredxor_vs({{ + Vd_vi[0] = + reduce_loop(std::bit_xor(), Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x4: vredminu_vs({{ + Vd_vu[0] = + reduce_loop([](const vu& src1, const vu& src2) { + return std::min(src1, src2); + }, Vs1_vu, Vs2_vu); + }}, OPMVV, VectorIntegerReduceOp); + 0x5: vredmin_vs({{ + Vd_vi[0] = + reduce_loop([](const vi& src1, const vi& src2) { + return std::min(src1, src2); + }, Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x6: vredmaxu_vs({{ + Vd_vu[0] = + reduce_loop([](const vu& src1, const vu& src2) { + return std::max(src1, src2); + }, Vs1_vu, Vs2_vu); + }}, OPMVV, VectorIntegerReduceOp); + 0x7: vredmax_vs({{ + Vd_vi[0] = + reduce_loop([](const vi& src1, const vi& src2) { + return std::max(src1, src2); + }, Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + } + format VectorIntFormat { + 0x8: vaaddu_vv({{ + __uint128_t res = (__uint128_t)Vs2_vu[i] + Vs1_vu[i]; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vu[i] = res >> 1; + }}, OPMVV, VectorIntegerArithOp); + 0x9: vaadd_vv({{ + __uint128_t res = (__uint128_t)Vs2_vi[i] + Vs1_vi[i]; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vi[i] = res >> 1; + }}, OPMVV, VectorIntegerArithOp); + 0xa: vasubu_vv({{ + __uint128_t res = (__uint128_t)Vs2_vu[i] - Vs1_vu[i]; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vu[i] = res >> 1; + }}, OPMVV, VectorIntegerArithOp); + 0xb: vasub_vv({{ + __uint128_t res = (__uint128_t)Vs2_vi[i] - Vs1_vi[i]; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vi[i] = res >> 1; + }}, OPMVV, VectorIntegerArithOp); + } + // VWXUNARY0 + 0x10: decode VS1 { + 0x00: decode VM { + // The encodings corresponding to the masked versions + // (vm=0) of vmv.x.s are reserved. + 0x1: VectorNonSplitFormat::vmv_x_s({{ + Rd_ud = Vs2_vi[0]; + }}, OPMVV, VectorMiscOp); + } + 0x10: Vector1Vs1RdMaskFormat::vcpop_m({{ + uint64_t popcount = 0; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + bool vs2_lsb = elem_mask(Vs2_vu, i); + if(this->vm){ + popcount += vs2_lsb; + }else{ + bool do_mask = elem_mask(v0, i); + popcount += (vs2_lsb && do_mask); + } + } + Rd_vu = popcount; + }}, OPMVV, VectorMiscOp); + 0x11: Vector1Vs1RdMaskFormat::vfirst_m({{ + int64_t pos = -1; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + if(this->vm == 0){ + if(elem_mask(v0, i)==0){ + continue; + } + } + bool vs2_lsb = elem_mask(Vs2_vu, i); + if (vs2_lsb) { + pos = i; + break; + } + } + Rd_vu = pos; + }}, OPMVV, VectorMiscOp); + } + 0x12: decode VS1 { + format VectorIntExtFormat { + 0x02: vzext_vf8({{ + Vd_vu[i] = Vs2_vextu[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x03: vsext_vf8({{ + Vd_vi[i] = Vs2_vext[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x04: vzext_vf4({{ + Vd_vu[i] = Vs2_vextu[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x05: vsext_vf4({{ + Vd_vi[i] = Vs2_vext[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x06: vzext_vf2({{ + Vd_vu[i] = Vs2_vextu[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x07: vsext_vf2({{ + Vd_vi[i] = Vs2_vext[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + } + } + 0x14: decode VS1 { + 0x01: Vector1Vs1VdMaskFormat::vmsbf_m({{ + bool has_one = false; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + bool vs2_lsb = elem_mask(Vs2_vu, i); + bool do_mask = elem_mask(v0, i); + if(this->vm||(this->vm == 0&&do_mask)){ + uint64_t res = 0; + if (!has_one && !vs2_lsb) { + res = 1; + } else if(!has_one && vs2_lsb) { + has_one = true; + } + Vd_ub[i/8] = ASSIGN_VD_BIT(i, res); + } + } + }}, OPMVV, VectorMiscOp); + 0x02: Vector1Vs1VdMaskFormat::vmsof_m({{ + bool has_one = false; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + bool vs2_lsb = elem_mask(Vs2_vu, i); + bool do_mask = elem_mask(v0, i); + if(this->vm||(this->vm == 0&&do_mask)){ + uint64_t res = 0; + if(!has_one && vs2_lsb) { + has_one = true; + res = 1; + } + Vd_ub[i/8] = ASSIGN_VD_BIT(i, res); + } + } + }}, OPMVV, VectorMiscOp); + 0x03: Vector1Vs1VdMaskFormat::vmsif_m({{ + bool has_one = false; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + bool vs2_lsb = elem_mask(Vs2_vu, i); + bool do_mask = elem_mask(v0, i); + if(this->vm||(this->vm == 0&&do_mask)){ + uint64_t res = 0; + if (!has_one && !vs2_lsb) { + res = 1; + } else if(!has_one && vs2_lsb) { + has_one = true; + res = 1; + } + Vd_ub[i/8] = ASSIGN_VD_BIT(i, res); + } + } + }}, OPMVV, VectorMiscOp); + 0x10: ViotaFormat::viota_m({{ + RiscvISAInst::VecRegContainer tmp_s2; + xc->getRegOperand(this, 2, + &tmp_s2); + auto Vs2bit = tmp_s2.as(); + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + + vtype_VLMAX(vtype, true) * this->microIdx; + bool vs2_lsb = elem_mask(Vs2bit, ei); + bool do_mask = elem_mask(v0, ei); + bool has_one = false; + if (this->vm || (do_mask && !this->vm)) { + if (vs2_lsb) { + has_one = true; + } + } + bool use_ori = (!this->vm) && !do_mask; + if(use_ori == false){ + Vd_vu[i] = *cnt; + } + if (has_one) { + *cnt = *cnt+1; + } + } + }}, OPMVV, VectorMiscOp); + 0x11: VectorIntFormat::vid_v({{ + Vd_vu[i] = ei; + }}, OPMVV, VectorMiscOp); + } + format VectorMaskFormat { + 0x18: vmandn_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) & !elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x19: vmand_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x1a: vmor_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x1b: vmxor_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x1c: vmorn_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) | !elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x1d: vmnand_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + !(elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i))); + }}, OPMVV, VectorMiscOp); + 0x1e: vmnor_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + !(elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i))); + }}, OPMVV, VectorMiscOp); + 0x1f: vmxnor_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + !(elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i))); + }}, OPMVV, VectorMiscOp); + } + format VectorIntFormat { + 0x20: vdivu_vv({{ + if (Vs1_vu[i] == 0) + Vd_vu[i] = (vu)-1; + else + Vd_vu[i] = Vs2_vu[i] / Vs1_vu[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x21: vdiv_vv({{ + if (Vs1_vi[i] == 0) + Vd_vi[i] = -1; + else if (Vs2_vi[i] == std::numeric_limits::min() + && Vs1_vi[i] == -1) + Vd_vi[i] = Vs2_vi[i]; + else + Vd_vi[i] = Vs2_vi[i] / Vs1_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x22: vremu_vv({{ + if (Vs1_vu[i] == 0) { + Vd_vu[i] = Vs2_vu[i]; + } else { + Vd_vu[i] = Vs2_vu[i] % Vs1_vu[i]; + } + }}, OPMVV, VectorIntegerArithOp); + 0x23: vrem_vv({{ + if (Vs1_vi[i] == 0) { + Vd_vi[i] = Vs2_vi[i]; + } else if (Vs2_vi[i] == std::numeric_limits::min() + && Vs1_vi[i] == -1) { + Vd_vi[i] = 0; + } else { + Vd_vi[i] = Vs2_vi[i] % Vs1_vi[i]; + } + }}, OPMVV, VectorIntegerArithOp); + 0x24: vmulhu_vv({{ + if (sew < 64) { + Vd_vu[i] = ((uint64_t)Vs2_vu[i] * Vs1_vu[i]) + >> sew; + } else { + Vd_vu[i] = mulhu(Vs2_vu[i], Vs1_vu[i]); + } + }}, OPMVV, VectorIntegerArithOp); + 0x25: vmul_vv({{ + Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x26: vmulhsu_vv({{ + if (sew < 64) { + Vd_vi[i] = ((int64_t)Vs2_vi[i] * + (uint64_t)Vs1_vu[i]) + >> sew; + } else { + Vd_vi[i] = mulhsu(Vs2_vi[i], Vs1_vu[i]); + } + }}, OPMVV, VectorIntegerArithOp); + 0x27: vmulh_vv({{ + if (sew < 64) { + Vd_vi[i] = ((int64_t)Vs2_vi[i] * Vs1_vi[i]) + >> sew; + } else { + Vd_vi[i] = mulh(Vs2_vi[i], Vs1_vi[i]); + } + }}, OPMVV, VectorIntegerArithOp); + 0x29: vmadd_vv({{ + Vd_vi[i] = Vs3_vi[i] * Vs1_vi[i] + Vs2_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x2b: vnmsub_vv({{ + Vd_vi[i] = -(Vs3_vi[i] * Vs1_vi[i]) + Vs2_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x2d: vmacc_vv({{ + Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i] + Vs3_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x2f: vnmsac_vv({{ + Vd_vi[i] = -(Vs2_vi[i] * Vs1_vi[i]) + Vs3_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + } + format VectorIntWideningFormat { + 0x30: vwaddu_vv({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + + vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x31: vwadd_vv({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + + vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x32: vwsubu_vv({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + - vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x33: vwsub_vv({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + - vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x34: vwaddu_wv({{ + Vd_vwu[i] = Vs2_vwu[i] + vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x35: vwadd_wv({{ + Vd_vwi[i] = Vs2_vwi[i] + vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x36: vwsubu_wv({{ + Vd_vwu[i] = Vs2_vwu[i] - vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x37: vwsub_wv({{ + Vd_vwi[i] = Vs2_vwi[i] - vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x38: vwmulu_vv({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + * vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x3a: vwmulsu_vv({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + * vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x3b: vwmul_vv({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + * vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x3c: vwmaccu_vv({{ + Vd_vwu[i] = vwu(Vs1_vu[i + offset]) + * vwu(Vs2_vu[i + offset]) + + Vs3_vwu[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x3d: vwmacc_vv({{ + Vd_vwi[i] = vwi(Vs1_vi[i + offset]) + * vwi(Vs2_vi[i + offset]) + + Vs3_vwi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x3f: vwmaccsu_vv({{ + Vd_vwi[i] = vwi(Vs1_vi[i + offset]) + * vwu(Vs2_vu[i + offset]) + + Vs3_vwi[i]; + }}, OPMVV, VectorIntegerArithOp); + } + } + // OPIVI + 0x3: decode VFUNCT6 { + format VectorIntFormat { + 0x00: vadd_vi({{ + Vd_vi[i] = Vs2_vi[i] + (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + 0x03: vrsub_vi({{ + Vd_vi[i] = (vi)sext<5>(SIMM5) - Vs2_vi[i]; + }}, OPIVI, VectorIntegerArithOp); + 0x09: vand_vi({{ + Vd_vi[i] = Vs2_vi[i] & (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + 0x0a: vor_vi({{ + Vd_vi[i] = Vs2_vi[i] | (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + 0x0b: vxor_vi({{ + Vd_vi[i] = Vs2_vi[i] ^ (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + } + 0x0c: VectorGatherFormat::vrgather_vi({{ + for (uint32_t i = 0; i < microVl; i++) { + uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + if (this->vm || elem_mask(v0, ei)) { + const uint64_t idx = + (uint64_t)sext<5>(SIMM5) - vs2_elems * vs2_idx; + Vd_vu[i] = ((uint64_t)sext<5>(SIMM5) >= vlmax) ? 0 + : (idx < vs2_elems) ? Vs2_vu[idx] + : Vs3_vu[i]; + } + } + }}, OPIVI, VectorMiscOp); + 0x0e: VectorSlideUpFormat::vslideup_vi({{ + const int offset = (int)(uint64_t)(SIMM5); + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vdIdx - vs2Idx; + const int offsetInVreg = offset - vregOffset * microVlmax; + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int vs2Offset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int elemOffset = vdOffset + vdIdx * microVlmax; + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + if (this->vm || elem_mask(v0, i + elemOffset)) { + Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + } + } + }}, OPIVI, VectorMiscOp); + 0x0f: VectorSlideDownFormat::vslidedown_vi({{ + const int offset = (int)(uint64_t)(SIMM5); + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vs2Idx - vdIdx; + const int offsetInVreg = offset - vregOffset * microVlmax; + const int numVs2s = vtype_regs_per_group(vtype); + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const bool needZeroTail = numVs2s == vs2Idx + 1; + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int vs2Offset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int elemIdxBase = vdIdx * microVlmax; + vreg_t resVreg; + auto res = resVreg.as(); + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + res[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + if (needZeroTail) { + for (int i = upperBound + vdOffset; + i < microVlmax; i++) { + res[i] = 0; + } + } + for (int i = vdOffset; i < microVl ; i++) { + if (vm || elem_mask(v0, i + elemIdxBase)) { + Vd_vu[i] = res[i]; + } + } + } + }}, OPIVI, VectorMiscOp); + format VectorIntFormat { + 0x10: decode VM { + 0x0: vadc_vim({{ + Vd_vi[i] = Vs2_vi[i] + + (vi)sext<5>(SIMM5) + elem_mask(v0, ei); + }}, OPIVI, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x17: decode VM { + 0x0: vmerge_vim({{ + Vd_vi[i] = elem_mask(v0, ei) + ? (vi)sext<5>(SIMM5) + : Vs2_vi[i]; + }}, OPIVI, VectorIntegerArithOp); + 0x1: vmv_v_i({{ + Vd_vi[i] = (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + } + } + format VectorIntVxsatFormat{ + 0x20: vsaddu_vi({{ + Vd_vu[i] = sat_addu(Vs2_vu[i], (vu)SIMM5, + vxsatptr); + }}, OPIVI, VectorIntegerArithOp); + 0x21: vsadd_vi({{ + Vd_vu[i] = sat_add(Vs2_vu[i], (vu)SIMM5, + vxsatptr); + }}, OPIVI, VectorIntegerArithOp); + } + format VectorIntFormat { + 0x25: vsll_vi({{ + Vd_vu[i] = Vs2_vu[i] << ((vu)SIMM5 & (sew - 1) & 0x1f); + }}, OPIVI, VectorIntegerArithOp); + 0x28: vsrl_vi({{ + Vd_vu[i] = Vs2_vu[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f); + }}, OPIVI, VectorIntegerArithOp); + 0x2a: vssrl_vi({{ + int sh = SIMM5 & (vtype_SEW(vtype) - 1); + __uint128_t res = Vs2_vu[i]; + + res = int_rounding<__uint128_t>( + res, 0 /* TODO */, sh) >> sh; + + Vd_vu[i] = res; + }}, OPIVI, VectorIntegerArithOp); + 0x29: vsra_vi({{ + Vd_vi[i] = Vs2_vi[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f); + }}, OPIVI, VectorIntegerArithOp); + 0x2b: vssra_vi({{ + int sh = SIMM5 & (sew - 1); + __int128_t val = Vs2_vi[i]; + + val = int_rounding<__int128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vi[i] = val >> sh; + }}, OPIVI, VectorIntegerArithOp); + } + // According to Spec Section 16.6, + // vm must be 1 (unmasked) in vmvr.v instructions. + 0x27: decode VM { 0x1: decode SIMM3 { + format VMvWholeFormat { + 0x0: vmv1r_v({{ + Vd_ud[i] = Vs2_ud[i]; + }}, OPIVI, VectorMiscOp); + 0x1: vmv2r_v({{ + Vd_ud[i] = Vs2_ud[i]; + }}, OPIVI, VectorMiscOp); + 0x3: vmv4r_v({{ + Vd_ud[i] = Vs2_ud[i]; + }}, OPIVI, VectorMiscOp); + 0x7: vmv8r_v({{ + Vd_ud[i] = Vs2_ud[i]; + }}, OPIVI, VectorMiscOp); + } + }} + format VectorIntMaskFormat { + 0x11: decode VM { + 0x0: vmadc_vim({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5), + elem_mask(v0, ei))); + }}, OPIVI, VectorIntegerArithOp); + 0x1: vmadc_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + } + 0x18: vmseq_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] == (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x19: vmsne_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] != (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x1c: vmsleu_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] <= (vu)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x1d: vmsle_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] <= (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x1e: vmsgtu_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] > (vu)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x1f: vmsgt_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] > (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + } + format VectorIntNarrowingFormat { + 0x2c: vnsrl_wi({{ + Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >> + ((vwu)SIMM5 & (sew * 2 - 1))); + }}, OPIVI, VectorIntegerArithOp); + 0x2d: vnsra_wi({{ + Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >> + ((vwu)SIMM5 & (sew * 2 - 1))); + }}, OPIVI, VectorIntegerArithOp); + 0x2e: vnclipu_wi({{ + vu max = std::numeric_limits::max(); + uint64_t sign_mask = + std::numeric_limits::max() << sew; + __uint128_t res = Vs2_vwu[i]; + unsigned shift = VS1 & ((sew * 2) - 1); + + res = int_rounding<__uint128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res & sign_mask) { + // TODO: vxsat + res = max; + } + + Vd_vu[i + offset] = (vu)res; + }}, OPIVI, VectorIntegerArithOp); + 0x2f: vnclip_wi({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + __int128_t res = Vs2_vwi[i]; + unsigned shift = VS1 & ((sew * 2) - 1); + + res = int_rounding<__int128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res < min) { + res = min; + // TODO: vxsat + } else if (res > max) { + res = max; + // TODO: vxsat + } + + Vd_vi[i + offset] = (vi)res; + }}, OPIVI, VectorIntegerArithOp); + } + } + // OPIVX + 0x4: decode VFUNCT6 { + format VectorIntFormat { + 0x0: vadd_vx({{ + Vd_vu[i] = Vs2_vu[i] + Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + 0x2: vsub_vx({{ + Vd_vu[i] = Vs2_vu[i] - Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + 0x3: vrsub_vx({{ + Vd_vu[i] = Rs1_vu - Vs2_vu[i]; + }}, OPIVX, VectorIntegerArithOp); + 0x4: vminu_vx({{ + Vd_vu[i] = std::min(Vs2_vu[i], Rs1_vu); + }}, OPIVX, VectorIntegerArithOp); + 0x5: vmin_vx({{ + Vd_vi[i] = std::min(Vs2_vi[i], Rs1_vi); + }}, OPIVX, VectorIntegerArithOp); + 0x6: vmaxu_vx({{ + Vd_vu[i] = std::max(Vs2_vu[i], Rs1_vu); + }}, OPIVX, VectorIntegerArithOp); + 0x7: vmax_vx({{ + Vd_vi[i] = std::max(Vs2_vi[i], Rs1_vi); + }}, OPIVX, VectorIntegerArithOp); + 0x9: vand_vx({{ + Vd_vu[i] = Vs2_vu[i] & Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + 0xa: vor_vx({{ + Vd_vu[i] = Vs2_vu[i] | Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + 0xb: vxor_vx({{ + Vd_vu[i] = Vs2_vu[i] ^ Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + } + 0x0e: VectorSlideUpFormat::vslideup_vx({{ + const int offset = (int)Rs1_vu; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vdIdx - vs2Idx; + const int offsetInVreg = offset - vregOffset * microVlmax; + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int vs2Offset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int elemOffset = vdOffset + vdIdx * microVlmax; + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + if (this->vm || elem_mask(v0, i + elemOffset)) { + Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + } + } + }}, OPIVX, VectorMiscOp); + 0x0f: VectorSlideDownFormat::vslidedown_vx({{ + const int offset = (int)Rs1_vu; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vs2Idx - vdIdx; + const int offsetInVreg = offset - vregOffset * microVlmax; + const int numVs2s = vtype_regs_per_group(vtype); + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const bool needZeroTail = numVs2s == vs2Idx + 1; + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int vs2Offset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int elemIdxBase = vdIdx * microVlmax; + vreg_t resVreg; + auto res = resVreg.as(); + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + res[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + if (needZeroTail) { + for (int i = upperBound + vdOffset; + i < microVlmax; i++) { + res[i] = 0; + } + } + for (int i = vdOffset; i < microVl ; i++) { + if (vm || elem_mask(v0, i + elemIdxBase)) { + Vd_vu[i] = res[i]; + } + } + } + }}, OPIVX, VectorMiscOp); + 0x0c: VectorGatherFormat::vrgather_vx({{ + for (uint32_t i = 0; i < microVl; i++) { + uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + if (this->vm || elem_mask(v0, ei)) { + const uint64_t idx = Rs1_vu - vs2_elems * vs2_idx; + Vd_vu[i] = (Rs1_vu >= vlmax) ? 0 + : (idx < vs2_elems) ? Vs2_vu[idx] + : Vs3_vu[i]; + } + } + }}, OPIVX, VectorMiscOp); + format VectorIntFormat { + 0x10: decode VM { + 0x0: vadc_vxm({{ + Vd_vi[i] = Vs2_vi[i] + Rs1_vi + elem_mask(v0, ei); + }}, OPIVX, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x12: decode VM { + 0x0: vsbc_vxm({{ + Vd_vi[i] = Vs2_vi[i] - Rs1_vi - elem_mask(v0, ei); + }}, OPIVX, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x17: decode VM { + 0x0: vmerge_vxm({{ + Vd_vu[i] = elem_mask(v0, ei) ? Rs1_vu : Vs2_vu[i]; + }}, OPIVX, VectorIntegerArithOp); + 0x1: decode VS2 { + 0x0: vmv_v_x({{ + Vd_vu[i] = Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + } + } + } + format VectorIntVxsatFormat{ + 0x20: vsaddu_vx({{ + Vd_vu[i] = sat_addu(Vs2_vu[i], Rs1_vu, + vxsatptr); + }}, OPIVX, VectorIntegerArithOp); + 0x21: vsadd_vx({{ + Vd_vu[i] = sat_add(Vs2_vu[i], Rs1_vu, + vxsatptr); + }}, OPIVX, VectorIntegerArithOp); + 0x22: vssubu_vx({{ + Vd_vu[i] = sat_subu(Vs2_vu[i], Rs1_vu, + vxsatptr); + }}, OPIVX, VectorIntegerArithOp); + 0x23: vssub_vx({{ + Vd_vu[i] = sat_sub(Vs2_vu[i], Rs1_vu, + vxsatptr); + }}, OPIVX, VectorIntegerArithOp); + 0x27: vsmul_vx({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + bool overflow = Rs1_vi == Vs2_vi[i] && Rs1_vi == min; + __int128_t result = + (__int128_t)Rs1_vi * (__int128_t)Vs2_vi[i]; + result = int_rounding<__uint128_t>( + result, 0 /* TODO */, sew - 1); + result = result >> (sew - 1); + if (overflow) { + result = max; + *vxsatptr = true; + } + + Vd_vi[i] = (vi)result; + }}, OPIVX, VectorIntegerArithOp); + } + format VectorIntFormat { + 0x25: vsll_vx({{ + Vd_vu[i] = Vs2_vu[i] << (Rs1_vu & (sew - 1)); + }}, OPIVX, VectorIntegerArithOp); + 0x28: vsrl_vx({{ + Vd_vu[i] = Vs2_vu[i] >> (Rs1_vu & (sew - 1)); + }}, OPIVX, VectorIntegerArithOp); + 0x29: vsra_vx({{ + Vd_vi[i] = Vs2_vi[i] >> (Rs1_vu & (sew - 1)); + }}, OPIVX, VectorIntegerArithOp); + 0x2a: vssrl_vx({{ + int sh = Rs1_vu & (sew - 1); + __uint128_t val = Vs2_vu[i]; + + val = int_rounding<__uint128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vu[i] = val >> sh; + }}, OPIVX, VectorIntegerArithOp); + 0x2b: vssra_vx({{ + int sh = Rs1_vu & (sew - 1); + __int128_t val = Vs2_vi[i]; + + val = int_rounding<__int128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vi[i] = val >> sh; + }}, OPIVX, VectorIntegerArithOp); + } + format VectorIntNarrowingFormat { + 0x2c: vnsrl_wx({{ + Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >> + ((vwu)Rs1_vu & (sew * 2 - 1))); + }}, OPIVX, VectorIntegerArithOp); + 0x2d: vnsra_wx({{ + Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >> + ((vwu)Rs1_vu & (sew * 2 - 1))); + }}, OPIVX, VectorIntegerArithOp); + 0x2e: vnclipu_wx({{ + vu max = std::numeric_limits::max(); + uint64_t sign_mask = + std::numeric_limits::max() << sew; + __uint128_t res = Vs2_vwu[i]; + unsigned shift = Rs1_vu & ((sew * 2) - 1); + + res = int_rounding<__uint128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res & sign_mask) { + // TODO: vxsat + res = max; + } + + Vd_vu[i + offset] = (vu)res; + }}, OPIVX, VectorIntegerArithOp); + 0x2f: vnclip_wx({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + __int128_t res = Vs2_vwi[i]; + unsigned shift = Rs1_vi & ((sew * 2) - 1); + + res = int_rounding<__int128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res < min) { + res = min; + // TODO: vxsat + } else if (res > max) { + res = max; + // TODO: vxsat + } + + Vd_vi[i + offset] = (vi)res; + }}, OPIVX, VectorIntegerArithOp); + } + + format VectorIntMaskFormat { + 0x11: decode VM { + 0x0: vmadc_vxm({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vi[i], Rs1_vi, + elem_mask(v0, ei))); + }}, OPIVX, VectorIntegerArithOp); + 0x1: vmadc_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vi[i], Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + } + 0x13: decode VM { + 0x0: vmsbc_vxm({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + borrow_out(Vs2_vi[i], Rs1_vi, + elem_mask(v0, ei))); + }}, OPIVX, VectorIntegerArithOp); + 0x1: vmsbc_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + borrow_out(Vs2_vi[i], Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + } + 0x18: vmseq_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] == Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x19: vmsne_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] != Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x1a: vmsltu_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] < Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x1b: vmslt_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] < Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + 0x1c: vmsleu_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] <= Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x1d: vmsle_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] <= Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + 0x1e: vmsgtu_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] > Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x1f: vmsgt_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] > Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + } + } + // OPFVF + 0x5: decode VFUNCT6 { + format VectorFloatFormat{ + 0x00: vfadd_vf({{ + auto fd = fadd(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x02: vfsub_vf({{ + auto fd = fsub(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x04: vfmin_vf({{ + auto fd = fmin(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x06: vfmax_vf({{ + auto fd = fmax(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x08: vfsgnj_vf({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)), + false, false).v; + }}, OPFVF, VectorFloatArithOp); + 0x09: vfsgnjn_vf({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)), + true, false).v; + }}, OPFVF, VectorFloatArithOp); + 0x0a: vfsgnjx_vf({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)), + false, true).v; + }}, OPFVF, VectorFloatArithOp); + } + 0x0e: VectorFloatSlideUpFormat::vfslide1up_vf({{ + const int offset = 1; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vdIdx - vs2Idx; + const int offsetInVreg = offset - vregOffset * microVlmax; + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int vs2Offset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int elemOffset = vdOffset + vdIdx * microVlmax; + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + if (this->vm || elem_mask(v0, i + elemOffset)) { + Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + } + // TODO: dirty code + if (vdIdx == 0 && vs2Idx == 0 && + (this->vm || elem_mask(v0, 0))) { + tmp_d0.as()[0] = Rs1_vu; + } + } + }}, OPFVF, VectorMiscOp); + 0x0f: VectorFloatSlideDownFormat::vfslide1down_vf({{ + const int offset = 1; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vs2Idx - vdIdx; + const int offsetInVreg = offset - vregOffset * microVlmax; + const int numVs2s = vtype_regs_per_group(vtype); + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const bool needZeroTail = numVs2s == vs2Idx + 1; + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int vs2Offset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int elemIdxBase = vdIdx * microVlmax; + vreg_t resVreg; + auto res = resVreg.as(); + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + res[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + if (needZeroTail) { + for (int i = upperBound + vdOffset; + i < microVlmax; i++) { + res[i] = 0; + } + } + for (int i = vdOffset; i < microVl ; i++) { + if (vm || elem_mask(v0, i + elemIdxBase)) { + Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1) + ? res[i] + : Rs1_vu; + } + } + } + }}, OPFVF, VectorMiscOp); + // VRFUNARY0 + 0x10: decode VS2 { + 0x00: decode VM { + // The encodings corresponding to the masked versions + // (vm=0) of vfmv.s.f are reserved + 0x1: VectorNonSplitFormat::vfmv_s_f({{ + auto fd = ftype_freg(freg(Fs1_bits)); + Vd_vu[0] = fd.v; + }}, OPFVV, VectorMiscOp); + } + } + format VectorFloatFormat{ + 0x17: decode VM { + 0x0: vfmerge_vfm({{ + Vd_vu[i] = elem_mask(v0, ei) + ? ftype_freg(freg(Fs1_bits)).v + : Vs2_vu[i]; + }}, OPFVF, VectorFloatArithOp); + 0x1: vfmv_v_f({{ + auto fd = ftype_freg(freg(Fs1_bits)); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + } + } + format VectorFloatMaskFormat { + 0x18: vmfeq_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + feq(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)))); + }}, OPFVF, VectorFloatArithOp); + 0x19: vmfle_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + fle(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)))); + }}, OPFVF, VectorFloatArithOp); + 0x1b: vmflt_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + flt(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)))); + }}, OPFVF, VectorFloatArithOp); + 0x1c: vmfne_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + !feq(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)))); + }}, OPFVF, VectorFloatArithOp); + 0x1d: vmfgt_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + flt(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i]))); + }}, OPFVF, VectorFloatArithOp); + 0x1f: vmfge_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + fle(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i]))); + }}, OPFVF, VectorFloatArithOp); + } + format VectorFloatFormat{ + 0x20: vfdiv_vf({{ + auto fd = fdiv(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x21: vfrdiv_vf({{ + auto fd = fdiv(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x24: vfmul_vf({{ + auto fd = fmul(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x27: vfrsub_vf({{ + auto fd = fsub(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x28: vfmadd_vf({{ + auto fd = fmadd(ftype(Vs3_vu[i]), + ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x29: vfnmadd_vf({{ + auto fd = fmadd(fneg(ftype(Vs3_vu[i])), + ftype_freg(freg(Fs1_bits)), + fneg(ftype(Vs2_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2a: vfmsub_vf({{ + auto fd = fmadd(ftype(Vs3_vu[i]), + ftype_freg(freg(Fs1_bits)), + fneg(ftype(Vs2_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2b: vfnmsub_vf({{ + auto fd = fmadd(fneg(ftype(Vs3_vu[i])), + ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2c: vfmacc_vf({{ + auto fd = fmadd(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i]), + ftype(Vs3_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2d: vfnmacc_vf({{ + auto fd = fmadd( + fneg(ftype_freg(freg(Fs1_bits))), + ftype(Vs2_vu[i]), + fneg(ftype(Vs3_vu[i])) + ); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2e: vfmsac_vf({{ + auto fd = fmadd(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i]), + fneg(ftype(Vs3_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2f: vfnmsac_vf({{ + auto fd = fmadd( + fneg(ftype_freg(freg(Fs1_bits))), + ftype(Vs2_vu[i]), + ftype(Vs3_vu[i]) + ); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + } + format VectorFloatWideningFormat { + 0x30: vfwadd_vf({{ + auto fd = fadd( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x32: vfwsub_vf({{ + auto fd = fsub( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x34: vfwadd_wf({{ + auto fd = fadd( + ftype(Vs2_vwu[i]), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x36: vfwsub_wf({{ + auto fd = fsub( + ftype(Vs2_vwu[i]), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x38: vfwmul_vf({{ + auto fd = fmul( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x3c: vfwmacc_vf({{ + auto fd = fmadd( + fwiden(ftype_freg(freg(Fs1_bits))), + fwiden(ftype(Vs2_vu[i + offset])), + ftype(Vs3_vwu[i])); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x3d: vfwnmacc_vf({{ + auto fd = fmadd( + fwiden(fneg(ftype_freg(freg(Fs1_bits)))), + fwiden(ftype(Vs2_vu[i + offset])), + fneg(ftype(Vs3_vwu[i]))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x3e: vfwmsac_vf({{ + auto fd = fmadd( + fwiden(ftype_freg(freg(Fs1_bits))), + fwiden(ftype(Vs2_vu[i + offset])), + fneg(ftype(Vs3_vwu[i]))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x3f: vfwnmsac_vf({{ + auto fd = fmadd( + fwiden(fneg(ftype_freg(freg(Fs1_bits)))), + fwiden(ftype(Vs2_vu[i + offset])), + ftype(Vs3_vwu[i])); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + } + } + // OPMVX + 0x6: decode VFUNCT6 { + format VectorIntFormat { + 0x08: vaaddu_vx({{ + __uint128_t res = (__uint128_t)Vs2_vu[i] + Rs1_vu; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vu[i] = res >> 1; + }}, OPMVX, VectorIntegerArithOp); + 0x09: vaadd_vx({{ + __uint128_t res = (__uint128_t)Vs2_vi[i] + Rs1_vi; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vi[i] = res >> 1; + }}, OPMVX, VectorIntegerArithOp); + } + 0x0e: VectorSlideUpFormat::vslide1up_vx({{ + const int offset = 1; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vdIdx - vs2Idx; + const int offsetInVreg = offset - vregOffset * microVlmax; + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int vs2Offset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int elemOffset = vdOffset + vdIdx * microVlmax; + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + if (this->vm || elem_mask(v0, i + elemOffset)) { + Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + } + // TODO: dirty code + if (vdIdx == 0 && vs2Idx == 0 && + (this->vm || elem_mask(v0, 0))) { + tmp_d0.as()[0] = Rs1_vu; + } + } + }}, OPIVX, VectorMiscOp); + 0x0f: VectorSlideDownFormat::vslide1down_vx({{ + const int offset = 1; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vs2Idx - vdIdx; + const int offsetInVreg = offset - vregOffset * microVlmax; + const int numVs2s = vtype_regs_per_group(vtype); + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const bool needZeroTail = numVs2s == vs2Idx + 1; + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int vs2Offset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int elemIdxBase = vdIdx * microVlmax; + vreg_t resVreg; + auto res = resVreg.as(); + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + res[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + if (needZeroTail) { + for (int i = upperBound + vdOffset; + i < microVlmax; i++) { + res[i] = 0; + } + } + for (int i = vdOffset; i < microVl ; i++) { + if (vm || elem_mask(v0, i + elemIdxBase)) { + Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1) + ? res[i] + : Rs1_vu; + } + } + } + }}, OPIVX, VectorMiscOp); + // VRXUNARY0 + 0x10: decode VS2 { + 0x00: decode VM { + // The encodings corresponding to the masked versions + // (vm=0) of vmv.s.x are reserved. + 0x1: VectorNonSplitFormat::vmv_s_x({{ + Vd_vu[0] = Rs1_vu; + }}, OPMVX, VectorMiscOp); + } + } + format VectorIntFormat { + 0x0a: vasubu_vx({{ + __uint128_t res = (__uint128_t)Vs2_vu[i] - Rs1_vu; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vu[i] = res >> 1; + }}, OPMVX, VectorIntegerArithOp); + 0x0b: vasub_vx({{ + __uint128_t res = (__uint128_t)Vs2_vi[i] - Rs1_vi; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vi[i] = res >> 1; + }}, OPMVX, VectorIntegerArithOp); + 0x20: vdivu_vx({{ + if (Rs1_vu == 0) + Vd_vu[i] = (vu)-1; + else + Vd_vu[i] = Vs2_vu[i] / Rs1_vu; + }}, OPMVX, VectorIntegerArithOp); + 0x21: vdiv_vx({{ + if (Rs1_vi == 0) + Vd_vi[i] = -1; + else if (Vs2_vi[i] == std::numeric_limits::min() + && Rs1_vi == -1) + Vd_vi[i] = Vs2_vi[i]; + else + Vd_vi[i] = Vs2_vi[i] / Rs1_vi; + }}, OPMVX, VectorIntegerArithOp); + 0x22: vremu_vx({{ + if (Rs1_vu == 0) + Vd_vu[i] = Vs2_vu[i]; + else + Vd_vu[i] = Vs2_vu[i] % Rs1_vu; + }}, OPMVX, VectorIntegerArithOp); + 0x23: vrem_vx({{ + if (Rs1_vi == 0) + Vd_vi[i] = Vs2_vi[i]; + else if (Vs2_vi[i] == std::numeric_limits::min() + && Rs1_vi == -1) + Vd_vi[i] = 0; + else + Vd_vi[i] = Vs2_vi[i] % Rs1_vi; + }}, OPMVX, VectorIntegerArithOp); + 0x24: vmulhu_vx({{ + if (sew < 64) + Vd_vu[i] = ((uint64_t)Vs2_vu[i] * Rs1_vu) + >> sew; + else + Vd_vu[i] = mulhu(Vs2_vu[i], Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x25: vmul_vx({{ + Vd_vi[i] = Vs2_vi[i] * Rs1_vi; + }}, OPMVX, VectorIntegerArithOp); + 0x26: vmulhsu_vx({{ + if (sew < 64) + Vd_vi[i] = ((int64_t)Vs2_vi[i] * + (uint64_t)Rs1_vu) + >> sew; + else + Vd_vi[i] = mulhsu(Vs2_vi[i], Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x27: vmulh_vx({{ + if (sew < 64) + Vd_vi[i] = ((int64_t)Vs2_vi[i] * Rs1_vi) + >> sew; + else + Vd_vi[i] = mulh(Vs2_vi[i], Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x29: vmadd_vx({{ + Vd_vi[i] = Vs3_vi[i] * Rs1_vi + Vs2_vi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x2b: vnmsub_vx({{ + Vd_vi[i] = -(Vs3_vi[i] * Rs1_vi) + Vs2_vi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x2d: vmacc_vx({{ + Vd_vi[i] = Vs2_vi[i] * Rs1_vi + Vs3_vi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x2f: vnmsac_vx({{ + Vd_vi[i] = -(Vs2_vi[i] * Rs1_vi) + Vs3_vi[i]; + }}, OPMVX, VectorIntegerArithOp); + } + format VectorIntWideningFormat { + 0x30: vwaddu_vx({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x31: vwadd_vx({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x32: vwsubu_vx({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) - vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x33: vwsub_vx({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) - vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x34: vwaddu_wx({{ + Vd_vwu[i] = Vs2_vwu[i] + vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x35: vwadd_wx({{ + Vd_vwi[i] = Vs2_vwi[i] + vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x36: vwsubu_wx({{ + Vd_vwu[i] = Vs2_vwu[i] - vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x37: vwsub_wx({{ + Vd_vwi[i] = Vs2_vwi[i] - vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x38: vwmulu_vx({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) * vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x3a: vwmulsu_vx({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x3b: vwmul_vx({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x3c: vwmaccu_vx({{ + Vd_vwu[i] = vwu(Rs1_vu) * vwu(Vs2_vu[i + offset]) + + Vs3_vwu[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x3d: vwmacc_vx({{ + Vd_vwi[i] = vwi(Rs1_vi) * vwi(Vs2_vi[i + offset]) + + Vs3_vwi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x3e: vwmaccus_vx({{ + Vd_vwi[i] = vwu(Rs1_vu) * vwi(Vs2_vi[i + offset]) + + Vs3_vwi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x3f: vwmaccsu_vx({{ + Vd_vwi[i] = vwi(Rs1_vi) * vwu(Vs2_vu[i + offset]) + + Vs3_vwi[i]; + }}, OPMVX, VectorIntegerArithOp); + } + } + 0x7: decode BIT31 { + format VConfOp { + 0x0: vsetvli({{ + uint64_t rd_bits = RD; + uint64_t rs1_bits = RS1; + uint64_t requested_vl = Rs1_ud; + uint64_t requested_vtype = zimm11; + + Rd_ud = 0; + }}, VectorConfigOp); + 0x1: decode BIT30 { + 0x0: vsetvl({{ + uint64_t rd_bits = RD; + uint64_t rs1_bits = RS1; + uint64_t requested_vl = Rs1_ud; + uint64_t requested_vtype = Rs2_ud; + + Rd_ud = 0; + }}, VectorConfigOp); + 0x1: vsetivli({{ + uint64_t rd_bits = RD; + uint64_t rs1_bits = -1; + uint64_t requested_vl = uimm; + uint64_t requested_vtype = zimm10; + + Rd_ud = 0; + }}, VectorConfigOp); + } + } + } + } + 0x18: decode FUNCT3 { format BOp { 0x0: beq({{ diff --git a/src/arch/riscv/isa/formats/formats.isa b/src/arch/riscv/isa/formats/formats.isa index 2a6b91024d..d291929523 100644 --- a/src/arch/riscv/isa/formats/formats.isa +++ b/src/arch/riscv/isa/formats/formats.isa @@ -36,6 +36,9 @@ ##include "mem.isa" ##include "fp.isa" ##include "amo.isa" +##include "vector_conf.isa" +##include "vector_arith.isa" +##include "vector_mem.isa" // Include formats for nonstandard extensions ##include "compressed.isa" diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa new file mode 100644 index 0000000000..62982ded54 --- /dev/null +++ b/src/arch/riscv/isa/formats/vector_arith.isa @@ -0,0 +1,1319 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +let {{ + def setDestWrapper(destRegId): + return "setDestRegIdx(_numDestRegs++, " + destRegId + ");\n" + \ + "_numTypedDestRegs[VecRegClass]++;\n" + def setSrcWrapper(srcRegId): + return "setSrcRegIdx(_numSrcRegs++, " + srcRegId + ");\n" + def setSrcVm(): + return "if (!this->vm)\n" + \ + " setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);" + def vmDeclAndReadData(): + return ''' + [[maybe_unused]] RiscvISA::vreg_t tmp_v0; + [[maybe_unused]] uint8_t* v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + ''' + def copyOldVd(vd_idx): + return 'COPY_OLD_VD(%d);' % vd_idx + def loopWrapper(code, micro_inst = True): + if micro_inst: + upper_bound = "this->microVl" + else: + upper_bound = "(uint32_t)machInst.vl" + return ''' + for (uint32_t i = 0; i < %s; i++) { + %s + } + ''' % (upper_bound, code) + def maskCondWrapper(code): + return "if (this->vm || elem_mask(v0, ei)) {\n" + \ + code + "}\n" + def eiDeclarePrefix(code, widening = False): + if widening: + return ''' + uint32_t ei = i + micro_vlmax * this->microIdx; + ''' + code + else: + return ''' + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + ''' + code + + def wideningOpRegisterConstraintChecks(code): + return ''' + const uint32_t num_microops = 1 << std::max(0, vtype_vlmul(machInst.vtype8) + 1); + if ((machInst.vd % alignToPowerOfTwo(num_microops)) != 0) { + std::string error = + csprintf("Unaligned Vd group in Widening op"); + return std::make_shared(error, machInst); + } + if ((machInst.vs2 <= machInst.vd) && (machInst.vd < (machInst.vs2 + num_microops - 1))) { + // A destination vector register group can overlap a source vector + // register group if The destination EEW is greater than the source + // EEW, the source EMUL is at least 1, and the overlap is in the + // highest- numbered part of the destination register group. + std::string error = + csprintf("Unsupported overlap in Vs2 and Vd for Widening op"); + return std::make_shared(error, machInst); + } + ''' + code + + def narrowingOpRegisterConstraintChecks(code): + return ''' + const uint32_t num_microops = 1 << std::max(0, vtype_vlmul(machInst.vtype8) + 1); + if ((machInst.vs2 % alignToPowerOfTwo(num_microops)) != 0) { + std::string error = + csprintf("Unaligned VS2 group in Narrowing op"); + return std::make_shared(error, machInst); + } + if ((machInst.vs2 < machInst.vd) && (machInst.vd <= (VS2 + num_microops - 1))) { + // A destination vector register group can overlap a source vector + // register group The destination EEW is smaller than the source EEW + // and the overlap is in the lowest-numbered part of the source + // register group + std::string error = + csprintf("Unsupported overlap in Vs2 and Vd for Narrowing op"); + return std::make_shared(error, machInst); + } + ''' + code + + def fflags_wrapper(code): + return ''' + RegVal FFLAGS = xc->readMiscReg(MISCREG_FFLAGS); + std::feclearexcept(FE_ALL_EXCEPT); + ''' + code + ''' + FFLAGS |= softfloat_exceptionFlags; + softfloat_exceptionFlags = 0; + xc->setMiscReg(MISCREG_FFLAGS, FFLAGS); + ''' +}}; + + +def format VectorIntFormat(code, category, *flags) {{ + macroop_class_name = 'VectorArithMacroInst' + microop_class_name = 'VectorArithMicroInst' + + if name == "vid_v" : + macroop_class_name = 'VectorVMUNARY0MacroInst' + microp_class_name = 'VectorVMUNARY0MicroInst' + + iop = InstObjParams(name, Name, macroop_class_name, {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = inst_name not in ["vmv"] + mask_cond = v0_required and (inst_suffix not in ['vvm', 'vxm', 'vim']) + need_elem_idx = mask_cond or code.find("ei") != -1 + + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + + num_src_regs = 0 + + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + num_src_regs += 1 + + src1_reg_id = "" + if category in ["OPIVV", "OPMVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + num_src_regs += 1 + elif category in ["OPIVX", "OPMVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + num_src_regs += 1 + elif category == "OPIVI": + pass + else: + error("not supported category for VectorIntFormat: %s" % category) + + old_vd_idx = num_src_regs + src3_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + + # code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code) + code = loopWrapper(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + microop_class_name, + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntMicroDeclare.subst(microiop) + \ + VectorIntMicroConstructor.subst(microiop) + \ + VectorIntMicroExecute.subst(microiop) + \ + VectorIntMacroDeclare.subst(iop) + \ + VectorIntMacroConstructor.subst(iop) + + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + + +def format VectorIntExtFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + ext_div = int(inst_suffix[-1]) + + old_vd_idx = 1 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / " + \ + str(ext_div) + "]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'ext_div': ext_div}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntExtMicroDeclare.subst(microiop) + \ + VectorIntMicroConstructor.subst(microiop) + \ + VectorIntExtMicroExecute.subst(microiop) + \ + VectorIntExtMacroDeclare.subst(iop) + \ + VectorIntMacroConstructor.subst(iop) + + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + +def format VectorIntWideningFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = True + mask_cond = v0_required + need_elem_idx = mask_cond or code.find("ei") != -1 + old_vd_idx = 2 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src1_reg_id = "" + if category in ["OPIVV", "OPMVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]" + elif category in ["OPIVX", "OPMVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + else: + error("not supported category for VectorIntFormat: %s" % category) + src2_reg_id = "" + if inst_suffix in ["vv", "vx"]: + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]" + elif inst_suffix in ["wv", "wx"]: + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + + # code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code, widening=True) + code = loopWrapper(code) + + code = wideningOpRegisterConstraintChecks(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntWideningMicroDeclare.subst(microiop) + \ + VectorIntWideningMicroConstructor.subst(microiop) + \ + VectorIntWideningMicroExecute.subst(microiop) + \ + VectorIntWideningMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorIntWideningDecodeBlock.subst(iop) +}}; + +def format VectorIntNarrowingFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + mask_cond = True + need_elem_idx = True + + old_vd_idx = 2 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx / 2]" + if category in ["OPIVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]" + elif category in ["OPIVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + elif category == "OPIVI": + old_vd_idx = 1 + else: + error("not supported category for VectorIntFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vs3 + _microIdx / 2]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + # code + code = maskCondWrapper(code) + code = eiDeclarePrefix(code, widening=True) + code = loopWrapper(code) + code = narrowingOpRegisterConstraintChecks(code) + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx), + }, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntWideningMicroDeclare.subst(microiop) + \ + VectorIntWideningMicroConstructor.subst(microiop) + \ + VectorIntNarrowingMicroExecute.subst(microiop) + \ + VectorIntWideningMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorIntWideningDecodeBlock.subst(iop) +}}; + +def format VectorIntMaskFormat(code, category, *flags) {{ + iop = InstObjParams(name, + Name, + 'VectorArithMacroInst', + {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = not (inst_name in ["vmadc", "vmsbc"] \ + and inst_suffix in ["vv", "vx", "vi"]) + mask_cond = inst_name not in ['vmadc', 'vmsbc'] + need_elem_idx = mask_cond or code.find("ei") != -1 + + old_vd_idx = 2 + dest_reg_id = "vecRegClass[VecMemInternalReg0 + _microIdx]" + src1_reg_id = "" + if category == "OPIVV": + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + elif category == "OPIVX": + src1_reg_id = "intRegClass[_machInst.rs1]" + elif category == "OPIVI": + old_vd_idx = 1 + else: + error("not supported category for VectorIntFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + + #code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code) + code = loopWrapper(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntMaskMicroDeclare.subst(microiop) + \ + VectorIntMaskMicroConstructor.subst(microiop) + \ + VectorIntMaskMicroExecute.subst(microiop) + \ + VectorIntMaskMacroDeclare.subst(iop) + \ + VectorIntMaskMacroConstructor.subst(iop) + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + +def format VectorGatherFormat(code, category, *flags) {{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + if inst_name == "vrgatherei16": + idx_type = "uint16_t" + else: + idx_type = "elem_type" + iop = InstObjParams(name, Name, 'VectorArithMacroInst', + {'idx_type': idx_type, + 'code': code}, + flags) + old_vd_idx = 2 + dest_reg_id = "vecRegClass[_machInst.vd + vd_idx]" + src1_reg_id = "" + if category in ["OPIVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + vs1_idx]" + elif category in ["OPIVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + elif category == "OPIVI": + old_vd_idx = 1 + else: + error("not supported category for VectorIntFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + vs2_idx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + vd_idx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + + # code + + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'idx_type': idx_type}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorGatherMicroDeclare.subst(microiop) + \ + VectorGatherMicroConstructor.subst(microiop) + \ + VectorGatherMicroExecute.subst(microiop) + \ + VectorGatherMacroDeclare.subst(iop) + \ + VectorGatherMacroConstructor.subst(iop) + + decode_block = VectorGatherDecodeBlock.subst(iop) + +}}; + +def format VectorFloatFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = inst_name not in ["vfmv"] + mask_cond = v0_required and (inst_suffix not in ['vvm', 'vfm']) + need_elem_idx = mask_cond or code.find("ei") != -1 + + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src1_reg_id = "" + if category == "OPFVV": + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + elif category == "OPFVF": + src1_reg_id = "floatRegClass[_machInst.rs1]" + else: + error("not supported category for VectorFloatFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + # code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatMicroDeclare.subst(microiop) + \ + VectorFloatMicroConstructor.subst(microiop) + \ + VectorFloatMicroExecute.subst(microiop) + \ + VectorFloatMacroDeclare.subst(iop) + \ + VectorFloatMacroConstructor.subst(iop) + + decode_block = VectorFloatDecodeBlock.subst(iop) +}}; + +def format VectorFloatCvtFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + + old_vd_idx = 1 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatCvtMicroDeclare.subst(microiop) + \ + VectorFloatMicroConstructor.subst(microiop) + \ + VectorFloatMicroExecute.subst(microiop) + \ + VectorFloatCvtMacroDeclare.subst(iop) + \ + VectorFloatMacroConstructor.subst(iop) + + decode_block = VectorFloatDecodeBlock.subst(iop) +}}; + +def format VectorFloatWideningFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = True + mask_cond = v0_required + need_elem_idx = mask_cond or code.find("ei") != -1 + + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src1_reg_id = "" + if category in ["OPFVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]" + elif category in ["OPFVF"]: + src1_reg_id = "floatRegClass[_machInst.rs1]" + else: + error("not supported category for VectorFloatFormat: %s" % category) + src2_reg_id = "" + if inst_suffix in ["vv", "vf"]: + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]" + elif inst_suffix in ["wv", "wf"]: + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + + # code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code, widening=True) + code = loopWrapper(code) + code = fflags_wrapper(code) + + code = wideningOpRegisterConstraintChecks(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntWideningMicroDeclare.subst(microiop) + \ + VectorIntWideningMicroConstructor.subst(microiop) + \ + VectorFloatWideningMicroExecute.subst(microiop) + \ + VectorIntWideningMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorFloatWideningDecodeBlock.subst(iop) +}}; + +def format VectorFloatWideningCvtFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + + old_vd_idx = 1 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatCvtMicroDeclare.subst(microiop) + \ + VectorFloatMicroConstructor.subst(microiop) + \ + VectorFloatWideningMicroExecute.subst(microiop) + \ + VectorFloatCvtMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorFloatWideningDecodeBlock.subst(iop) +}}; + +def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + + old_vd_idx = 1 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx / 2]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx / 2]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + code = narrowingOpRegisterConstraintChecks(code) + + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatCvtMicroDeclare.subst(microiop) + \ + VectorFloatMicroConstructor.subst(microiop) + \ + VectorFloatNarrowingMicroExecute.subst(microiop) + \ + VectorFloatCvtMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorFloatWideningDecodeBlock.subst(iop) +}}; + +def format VectorFloatMaskFormat(code, category, *flags) {{ + iop = InstObjParams(name, + Name, + 'VectorArithMacroInst', + {'code': code}, + flags) + dest_reg_id = "vecRegClass[VecMemInternalReg0 + _microIdx]" + src1_reg_id = "" + if category == "OPFVV": + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + elif category == "OPFVF": + src1_reg_id = "floatRegClass[_machInst.rs1]" + else: + error("not supported category for VectorFloatFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatMaskMicroDeclare.subst(microiop) + \ + VectorFloatMaskMicroConstructor.subst(microiop) + \ + VectorFloatMaskMicroExecute.subst(microiop) + \ + VectorFloatMaskMacroDeclare.subst(iop) + \ + VectorFloatMaskMacroConstructor.subst(iop) + decode_block = VectorFloatDecodeBlock.subst(iop) +}}; + +def format VMvWholeFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VMvWholeMacroInst', {'code': code}, flags) + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VMvWholeMicroInst', + {'code': code}, + flags) + + header_output = \ + VMvWholeMacroDeclare.subst(iop) + \ + VMvWholeMicroDeclare.subst(microiop) + decoder_output = \ + VMvWholeMacroConstructor.subst(iop) + \ + VMvWholeMicroConstructor.subst(microiop) + exec_output = VMvWholeMicroExecute.subst(microiop) + decode_block = BasicDecode.subst(iop) +}}; + +def format ViotaFormat(code, category, *flags){{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + # The tail of vector mask inst should be treated as tail-agnostic. + # We treat it with tail-undisturbed policy, since + # the test suits only support undisturbed policy. + old_dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_dest_reg_idx = setDestWrapper(dest_reg_id) + vm_decl_rd = vmDeclAndReadData() + set_vm_idx = setSrcVm() + + microiop = InstObjParams(name+"_micro", + Name+"Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx, + 'copy_old_vd': copyOldVd(1)}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + ViotaMicroDeclare.subst(microiop) + \ + ViotaMicroConstructor.subst(microiop) + \ + ViotaMicroExecute.subst(microiop)+\ + ViotaMacroDeclare.subst(iop) + \ + ViotaMacroConstructor.subst(iop) + + decode_block = VectorIntDecodeBlock.subst(iop) + +}}; + +def format Vector1Vs1VdMaskFormat(code, category, *flags){{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src2_reg_id = "vecRegClass[_machInst.vs2]" + # The tail of vector mask inst should be treated as tail-agnostic. + # We treat it with tail-undisturbed policy, since + # the test suits only support undisturbed policy. + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_dest_reg_idx = setDestWrapper(dest_reg_id) + vm_decl_rd = vmDeclAndReadData() + set_vm_idx = setSrcVm() + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx, + 'copy_old_vd': copyOldVd(1)}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + Vector1Vs1RdMaskDeclare.subst(iop) + \ + Vector1Vs1VdMaskConstructor.subst(iop) + \ + Vector1Vs1VdMaskExecute.subst(iop) + + decode_block = VectorMaskDecodeBlock.subst(iop) +}}; + +def format Vector1Vs1RdMaskFormat(code, category, *flags){{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + vm_decl_rd = vmDeclAndReadData() + set_vm_idx = setSrcVm() + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + Vector1Vs1RdMaskDeclare.subst(iop) + \ + Vector1Vs1RdMaskConstructor.subst(iop) + \ + Vector1Vs1RdMaskExecute.subst(iop) + + decode_block = VectorMaskDecodeBlock.subst(iop) +}}; + +def format VectorNonSplitFormat(code, category, *flags) {{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + vm_decl_rd = "" + + set_vm_idx = "" + + if inst_name == "vfmv" : + code = fflags_wrapper(code) + + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx}, + flags) + + + if inst_name == "vfmv" : + execute_block = VectorFloatNonSplitExecute.subst(iop) + decode_block = VectorFloatDecodeBlock.subst(iop) + elif inst_name == "vmv" : + execute_block = VectorIntNonSplitExecute.subst(iop) + decode_block = VectorIntDecodeBlock.subst(iop) + else : + error("Unsupported inst for VectorNonSplitFormat: %s" % inst_name) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorNonSplitDeclare.subst(iop) + \ + VectorNonSplitConstructor.subst(iop) + \ + execute_block + +}}; + +def format VectorMaskFormat(code, category, *flags) {{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + old_vd_idx = 2 + if category not in ["OPMVV"]: + error("not supported category for VectorIntFormat: %s" % category) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2]" + + # The tail of vector mask inst should be treated as tail-agnostic. + # We treat it with tail-undisturbed policy, since + # the test suits only support undisturbed policy. + # TODO: remove it + old_dest_reg_id = "vecRegClass[_machInst.vd]" + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + code = loopWrapper(code, micro_inst = False) + + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorMaskDeclare.subst(iop) + \ + VectorMaskConstructor.subst(iop) + \ + VectorMaskExecute.subst(iop) + + decode_block = VectorMaskDecodeBlock.subst(iop) +}}; + +def format VectorReduceIntFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + # Treat tail undisturbed/agnostic as the same + # We always need old rd as src vreg + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + type_def = ''' + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + ''' + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'type_def': type_def, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMicroConstructor.subst(microiop) + \ + VectorReduceIntMicroExecute.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + \ + VectorReduceMacroConstructor.subst(iop) + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + +def format VectorReduceFloatFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + # Treat tail undisturbed/agnostic as the same + # We always need old rd as src vreg + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + type_def = ''' + using et = ElemType; + using vu = decltype(et::v); + ''' + + code = fflags_wrapper(code) + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'type_def': type_def, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMicroConstructor.subst(microiop) + \ + VectorReduceFloatMicroExecute.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + \ + VectorReduceMacroConstructor.subst(iop) + decode_block = VectorFloatDecodeBlock.subst(iop) +}}; + +def format VectorReduceFloatWideningFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + # Treat tail undisturbed/agnostic as the same + # We always need old rd as src vreg + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + type_def = ''' + using et = ElemType; + using vu [[maybe_unused]] = decltype(et::v); + using ewt = typename double_width::type; + using vwu = decltype(ewt::v); + ''' + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'type_def': type_def, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMicroConstructor.subst(microiop) + \ + VectorReduceFloatWideningMicroExecute.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + \ + VectorReduceMacroConstructor.subst(iop) + decode_block = VectorFloatWideningDecodeBlock.subst(iop) +}}; + +def format VectorIntVxsatFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + old_vd_idx = 2 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src1_reg_id = "" + if category in ["OPIVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + elif category in ["OPIVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + elif category == "OPIVI": + old_vd_idx = 1 + else: + error("not supported category for VectorIntVxsatFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntVxsatMicroDeclare.subst(microiop) + \ + VectorIntVxsatMicroConstructor.subst(microiop) + \ + VectorIntMicroExecute.subst(microiop) + \ + VectorIntVxsatMacroDeclare.subst(iop) + \ + VectorIntVxsatMacroConstructor.subst(iop) + + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + +def format VectorReduceIntWideningFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + # Treat tail undisturbed/agnostic as the same + # We always need old rd as src vreg + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMicroConstructor.subst(microiop) + \ + VectorReduceIntWideningMicroExecute.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + \ + VectorReduceMacroConstructor.subst(iop) + decode_block = VectorIntWideningDecodeBlock.subst(iop) +}}; + +let {{ + +def VectorSlideBase(name, Name, category, code, flags, macro_construtor, + decode_template, micro_execute_template): + macroop_class_name = 'VectorSlideMacroInst' + microop_class_name = 'VectorSlideMicroInst' + # Make sure flags are in lists (convert to lists if not). + flags = makeList(flags) + iop = InstObjParams(name, Name, macroop_class_name, {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd + vdIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + vs2Idx]" + src1_ireg_id = "intRegClass[_machInst.rs1]" + src1_freg_id = "floatRegClass[_machInst.rs1]" + + # The tail of vector mask inst should be treated as tail-agnostic. + # We treat it with tail-undisturbed policy, since + # the test suits only support undisturbed policy. + num_src_regs = 0 + + old_dest_reg_id = "vecRegClass[_machInst.vd + vdIdx]" + set_src_reg_idx = "" + if category in ["OPIVX", "OPMVX"]: + set_src_reg_idx += setSrcWrapper(src1_ireg_id) + num_src_regs += 1 + elif category in ["OPFVF"]: + set_src_reg_idx += setSrcWrapper(src1_freg_id) + num_src_regs += 1 + set_src_reg_idx += setSrcWrapper(src2_reg_id) + num_src_regs += 1 + old_vd_idx = num_src_regs + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_dest_reg_idx = setDestWrapper(dest_reg_id) + vm_decl_rd = vmDeclAndReadData() + set_src_reg_idx += setSrcVm() + microiop = InstObjParams(name + "_micro", + Name + "Micro", + microop_class_name, + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorSlideMicroDeclare.subst(microiop) + \ + VectorSlideMicroConstructor.subst(microiop) + \ + micro_execute_template.subst(microiop) + \ + VectorSlideMacroDeclare.subst(iop) + \ + macro_construtor.subst(iop) + + decode_block = decode_template.subst(iop) + return (header_output, decode_block) + +}}; + +def format VectorSlideUpFormat(code, category, *flags) {{ + (header_output, decode_block) = VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideUpMacroConstructor, + decode_template = VectorIntDecodeBlock, + micro_execute_template = VectorSlideMicroExecute) +}}; + +def format VectorSlideDownFormat(code, category, *flags) {{ + (header_output, decode_block) = VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideDownMacroConstructor, + decode_template = VectorIntDecodeBlock, + micro_execute_template = VectorSlideMicroExecute) +}}; + +def format VectorFloatSlideUpFormat(code, category, *flags) {{ + (header_output, decode_block) = VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideUpMacroConstructor, + decode_template = VectorFloatDecodeBlock, + micro_execute_template = VectorFloatSlideMicroExecute) +}}; + +def format VectorFloatSlideDownFormat(code, category, *flags) {{ + (header_output, decode_block) = VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideDownMacroConstructor, + decode_template = VectorFloatDecodeBlock, + micro_execute_template = VectorFloatSlideMicroExecute) +}}; \ No newline at end of file diff --git a/src/arch/riscv/isa/formats/vector_conf.isa b/src/arch/riscv/isa/formats/vector_conf.isa new file mode 100644 index 0000000000..31a489ef39 --- /dev/null +++ b/src/arch/riscv/isa/formats/vector_conf.isa @@ -0,0 +1,96 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +def format VConfOp(code, *flags) {{ + iop = InstObjParams(name, Name, 'VConfOp', code, flags) + header_output = BasicDeclare.subst(iop) + decoder_output = BasicConstructor.subst(iop) + decode_block = BasicDecode.subst(iop) + exec_output = VConfExecute.subst(iop) +}}; + +def template VConfExecute {{ + Fault + %(class_name)s::execute(ExecContext *xc, + Trace::InstRecord *traceData) const + { + auto tc = xc->tcBase(); + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + tc->setMiscReg(MISCREG_VSTART, 0); + + uint32_t vlen = xc->readMiscReg(MISCREG_VLENB) * 8; + uint32_t vlmax = getVlmax(xc->readMiscReg(MISCREG_VTYPE), vlen); + + VTYPE new_vtype = requested_vtype; + if (xc->readMiscReg(MISCREG_VTYPE) != new_vtype) { + vlmax = getVlmax(new_vtype, vlen); + + float vflmul = getVflmul(new_vtype.vlmul); + + uint32_t sew = getSew(new_vtype.vsew); + + uint32_t new_vill = + !(vflmul >= 0.125 && vflmul <= 8) || + sew > std::min(vflmul, 1.0f) * ELEN || + bits(requested_vtype, 30, 8) != 0; + if (new_vill) { + vlmax = 0; + new_vtype = 0; + new_vtype.vill = 1; + } + + xc->setMiscReg(MISCREG_VTYPE, new_vtype); + } + + uint32_t current_vl = xc->readMiscReg(MISCREG_VL); + uint32_t new_vl = 0; + if (vlmax == 0) { + new_vl = 0; + } else if (rd_bits == 0 && rs1_bits == 0) { + new_vl = current_vl > vlmax ? vlmax : current_vl; + } else if (rd_bits != 0 && rs1_bits == 0) { + new_vl = vlmax; + } else if (rs1_bits != 0) { + new_vl = requested_vl > vlmax ? vlmax : requested_vl; + } + + xc->setMiscReg(MISCREG_VL, new_vl); + + tc->getDecoderPtr()->as().setVlAndVtype(new_vl, new_vtype); + + Rd = new_vl; + + %(op_wb)s; + return NoFault; + } +}}; \ No newline at end of file diff --git a/src/arch/riscv/isa/formats/vector_mem.isa b/src/arch/riscv/isa/formats/vector_mem.isa new file mode 100644 index 0000000000..113250d5cf --- /dev/null +++ b/src/arch/riscv/isa/formats/vector_mem.isa @@ -0,0 +1,205 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +let {{ + +def VMemBase(name, Name, ea_code, memacc_code, mem_flags, + inst_flags, base_class, postacc_code='', + declare_template_base=VMemMacroDeclare, + decode_template=BasicDecode, exec_template_base='', + # If it's a macroop, the corresponding microops will be + # generated. + is_macroop=True): + # Make sure flags are in lists (convert to lists if not). + mem_flags = makeList(mem_flags) + inst_flags = makeList(inst_flags) + iop = InstObjParams(name, Name, base_class, + {'ea_code': ea_code, + 'memacc_code': memacc_code, + 'postacc_code': postacc_code }, + inst_flags) + + constructTemplate = eval(exec_template_base + 'Constructor') + + header_output = declare_template_base.subst(iop) + decoder_output = '' + if declare_template_base is not VMemTemplateMacroDeclare: + decoder_output += constructTemplate.subst(iop) + else: + header_output += constructTemplate.subst(iop) + decode_block = decode_template.subst(iop) + exec_output = '' + if not is_macroop: + return (header_output, decoder_output, decode_block, exec_output) + + microiop = InstObjParams(name + '_micro', + Name + 'Micro', + exec_template_base + 'MicroInst', + {'ea_code': ea_code, + 'memacc_code': memacc_code, + 'postacc_code': postacc_code}, + inst_flags) + + if mem_flags: + mem_flags = [ 'Request::%s' % flag for flag in mem_flags ] + s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';' + microiop.constructor += s + + microDeclTemplate = eval(exec_template_base + 'Micro' + 'Declare') + microExecTemplate = eval(exec_template_base + 'Micro' + 'Execute') + microInitTemplate = eval(exec_template_base + 'Micro' + 'InitiateAcc') + microCompTemplate = eval(exec_template_base + 'Micro' + 'CompleteAcc') + header_output = microDeclTemplate.subst(microiop) + header_output + micro_exec_output = (microExecTemplate.subst(microiop) + + microInitTemplate.subst(microiop) + + microCompTemplate.subst(microiop)) + if declare_template_base is not VMemTemplateMacroDeclare: + exec_output += micro_exec_output + else: + header_output += micro_exec_output + + return (header_output, decoder_output, decode_block, exec_output) + +}}; + +def format VleOp( + memacc_code, + ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VleMacroInst', exec_template_base='Vle') +}}; + +def format VseOp( + memacc_code, + ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VseMacroInst', exec_template_base='Vse') +}}; + +def format VlmOp( + memacc_code, + ea_code={{ EA = Rs1; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VleMacroInst', exec_template_base='Vlm', is_macroop=False) +}}; + +def format VsmOp( + memacc_code, + ea_code={{ EA = Rs1; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VseMacroInst', exec_template_base='Vsm', is_macroop=False) +}}; + +def format VlWholeOp( + memacc_code, + ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VlWholeMacroInst', exec_template_base='VlWhole') +}}; + +def format VsWholeOp( + memacc_code, + ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VsWholeMacroInst', exec_template_base='VsWhole') +}}; + +def format VlStrideOp( + memacc_code, + ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VlStrideMacroInst', exec_template_base='VlStride') +}}; + +def format VsStrideOp( + memacc_code, + ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VsStrideMacroInst', exec_template_base='VsStride') +}}; + +def format VlIndexOp( + memacc_code, + ea_code, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VlIndexMacroInst', exec_template_base='VlIndex', + declare_template_base=VMemTemplateMacroDeclare, + decode_template=VMemTemplateDecodeBlock + ) +}}; + +def format VsIndexOp( + memacc_code, + ea_code, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VsIndexMacroInst', exec_template_base='VsIndex', + declare_template_base=VMemTemplateMacroDeclare, + decode_template=VMemTemplateDecodeBlock + ) +}}; diff --git a/src/arch/riscv/isa/includes.isa b/src/arch/riscv/isa/includes.isa index a5cc5e85cc..3634d71150 100644 --- a/src/arch/riscv/isa/includes.isa +++ b/src/arch/riscv/isa/includes.isa @@ -34,6 +34,7 @@ // output header {{ +#include #include #include #include @@ -45,6 +46,8 @@ output header {{ #include #include +#include "arch/generic/memhelpers.hh" +#include "arch/riscv/decoder.hh" #include "arch/riscv/insts/amo.hh" #include "arch/riscv/insts/compressed.hh" #include "arch/riscv/insts/mem.hh" @@ -53,6 +56,7 @@ output header {{ #include "arch/riscv/insts/static_inst.hh" #include "arch/riscv/insts/unknown.hh" #include "arch/riscv/interrupts.hh" +#include "arch/riscv/insts/vector.hh" #include "cpu/static_inst.hh" #include "mem/packet.hh" #include "mem/request.hh" @@ -65,9 +69,15 @@ output decoder {{ #include #include +/* riscv softfloat library */ +#include +#include +#include + #include "arch/riscv/decoder.hh" #include "arch/riscv/faults.hh" #include "arch/riscv/mmu.hh" +#include "arch/riscv/regs/float.hh" #include "base/cprintf.hh" #include "base/loader/symtab.hh" #include "cpu/thread_context.hh" @@ -94,6 +104,7 @@ output exec {{ #include "arch/riscv/reg_abi.hh" #include "arch/riscv/regs/float.hh" #include "arch/riscv/regs/misc.hh" +#include "arch/riscv/regs/vector.hh" #include "arch/riscv/utility.hh" #include "base/condcodes.hh" #include "cpu/base.hh" diff --git a/src/arch/riscv/isa/main.isa b/src/arch/riscv/isa/main.isa index 24f366b00c..2923a965da 100644 --- a/src/arch/riscv/isa/main.isa +++ b/src/arch/riscv/isa/main.isa @@ -50,6 +50,9 @@ namespace RiscvISA; //Include the operand_types and operand definitions ##include "operands.isa" +//Include the definitions for the instruction templates +##include "templates/templates.isa" + //Include the definitions for the instruction formats ##include "formats/formats.isa" diff --git a/src/arch/riscv/isa/operands.isa b/src/arch/riscv/isa/operands.isa index 72d8f81bca..a81b28df57 100644 --- a/src/arch/riscv/isa/operands.isa +++ b/src/arch/riscv/isa/operands.isa @@ -38,7 +38,15 @@ def operand_types {{ 'sd' : 'int64_t', 'ud' : 'uint64_t', 'sf' : 'float', - 'df' : 'double' + 'df' : 'double', + + 'vi' : 'vi', + 'vu' : 'vu', + 'vwi' : 'vwi', + 'vwu' : 'vwu', + 'vext' : 'vext', + 'vextu' : 'vextu', + 'vc' : 'RiscvISA::VecRegContainer' }}; let {{ @@ -79,6 +87,11 @@ def operands {{ 'Fp2': FloatRegOp('df', 'FP2 + 8', 'IsFloating', 2), 'Fp2_bits': FloatRegOp('ud', 'FP2 + 8', 'IsFloating', 2), + 'Vd': VecRegOp('vc', 'VD', 'IsVector', 1), + 'Vs1': VecRegOp('vc', 'VS1', 'IsVector', 2), + 'Vs2': VecRegOp('vc', 'VS2', 'IsVector', 3), + 'Vs3': VecRegOp('vc', 'VS3', 'IsVector', 4), + #Memory Operand 'Mem': MemOp('ud', None, (None, 'IsLoad', 'IsStore'), 5), diff --git a/src/arch/riscv/isa/templates/templates.isa b/src/arch/riscv/isa/templates/templates.isa new file mode 100644 index 0000000000..2033ca9a02 --- /dev/null +++ b/src/arch/riscv/isa/templates/templates.isa @@ -0,0 +1,3 @@ +// Include +##include "vector_mem.isa" +##include "vector_arith.isa" diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa new file mode 100644 index 0000000000..cf1f5b9a85 --- /dev/null +++ b/src/arch/riscv/isa/templates/vector_arith.isa @@ -0,0 +1,1961 @@ +output header {{ + +#define ASSIGN_VD_BIT(idx, bit) \ + ((Vd[(idx)/8] & ~(1 << (idx)%8)) | ((bit) << (idx)%8)) + +#define COPY_OLD_VD(idx) \ + [[maybe_unused]] RiscvISA::vreg_t old_vd; \ + [[maybe_unused]] decltype(Vd) old_Vd = nullptr; \ + xc->getRegOperand(this, (idx), &old_vd); \ + old_Vd = old_vd.as >(); \ + memcpy(Vd, old_Vd, VLENB); + +#define VRM_REQUIRED \ + uint_fast8_t frm = xc->readMiscReg(MISCREG_FRM); \ + if (frm > 4) \ + return std::make_shared("RM fault", machInst); \ + softfloat_roundingMode = frm; + +template +bool inline +carry_out(Type a, Type b, bool carry_in = false) { + using TypeU = std::make_unsigned_t; + TypeU s = *reinterpret_cast(&a) + + *reinterpret_cast(&b) + carry_in; + return carry_in + ? (s <= *reinterpret_cast(&a)) + : (s < *reinterpret_cast(&a)); +} + +template +bool inline +borrow_out(Type a, Type b, bool borrow_in = false) { + using TypeU = std::make_unsigned_t; + return borrow_in + ? (*reinterpret_cast(&a) <= *reinterpret_cast(&b)) + : (*reinterpret_cast(&a) < *reinterpret_cast(&b)); +} + +}}; + +def template VectorIntMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} +}}; + +def template VectorIntMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1, vs2, vs3(old_vd), vm for *.vv, *.vx + // vs2, (old_vd), vm for *.vi + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorIntMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +} + +}}; + +def template VectorIntExtMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + std::string generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const override + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); + } +}; + +}}; + +def template VectorIntExtMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + std::string generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const override + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); + } +}; + +}}; + +def template VectorIntExtMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + auto SEW = vtype_SEW(vtype); + auto offset = (VLEN / SEW) * (microIdx % %(ext_div)d); + switch (SEW / %(ext_div)d) { + case 8: { + using vext [[maybe_unused]] = int8_t; + using vextu [[maybe_unused]] = uint8_t; + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + break; + } + case 16: { + using vext [[maybe_unused]] = int16_t; + using vextu [[maybe_unused]] = uint16_t; + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + break; + } + case 32: { + using vext [[maybe_unused]] = int32_t; + using vextu [[maybe_unused]] = uint32_t; + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + break; + } + default: break; + } + + return NoFault; +} + +}}; + +def template VectorIntDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b000: return new %(class_name)s(machInst); +case 0b001: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorIntWideningMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntWideningMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const int64_t vlmul = vtype_vlmul(_machInst.vtype8); + // Todo: move to Decode template + panic_if(vlmul == 3, "LMUL=8 is illegal for widening inst"); + // when LMUL setted as m1, need to split to 2 micro insts + const uint32_t num_microops = 1 << std::max(0, vlmul + 1); + + int32_t tmp_vl = this->vl; + const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorIntWideningMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1, vs2, vs3(old_vd), vm for *.vv, *.vx + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntWideningMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorIntWideningMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + using vwu [[maybe_unused]] = typename double_width::type; + using vwi [[maybe_unused]] = typename double_width::type; + [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorIntNarrowingMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + using vwu [[maybe_unused]] = typename double_width::type; + using vwi [[maybe_unused]] = typename double_width::type; + [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorIntWideningDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b000: return new %(class_name)s(machInst); +case 0b001: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorFloatMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorFloatMacroConstructor {{ +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} +}}; + +def template VectorFloatMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1, vs2, vs3(old_vd), vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorFloatMicroConstructor {{ +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorFloatMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu = decltype(et::v); + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + VRM_REQUIRED; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +} + +}}; + +def template VectorFloatDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorFloatCvtMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + std::string generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const override + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); + } +}; + +}}; + +def template VectorFloatCvtMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + std::string generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const override + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); + } +}; + +}}; + + +def template VectorFloatWideningMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu [[maybe_unused]] = decltype(et::v); + using ewt = typename double_width::type; + using vwu = decltype(ewt::v); + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + VRM_REQUIRED; + + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorFloatNarrowingMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu [[maybe_unused]] = decltype(et::v); + using ewt = typename double_width::type; + using vwu = decltype(ewt::v); + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + VRM_REQUIRED; + + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorFloatWideningDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b010: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template ViotaMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + int cnt = 0; + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + + +def template ViotaMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + + StaticInstPtr microop; + + // Allow one empty micro op to hold IsLastMicroop flag + for (int i = 0; i < num_microops && micro_vl >= 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i, + &cnt); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template ViotaMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; + int* cnt; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx, int* cnt); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template ViotaMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx, int* cnt) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + this->cnt = cnt; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2]); +} + +}}; + +def template ViotaMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + + +def template Vector1Vs1VdMaskConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; + %(set_vm_idx)s; +} + +}}; + +def template Vector1Vs1VdMaskExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu = uint8_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +}; + +}}; + +def template Vector1Vs1RdMaskDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + RegId srcRegIdxArr[2]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template Vector1Vs1RdMaskConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + %(constructor)s; + %(set_vm_idx)s; +} + +}}; + +def template Vector1Vs1RdMaskExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_rd)s; + uint64_t Rd = 0; + %(vm_decl_rd)s; + %(code)s; + %(op_wb)s; + return NoFault; +}; + +}}; + +def template VectorIntMaskMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntMaskMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, + this->microops.size()); + this->microops.push_back(microop); + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorIntMaskMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1(rs1), vs2, old_vd, v0 for *.vv[m] or *.vx[m] + // vs2, old_vd, v0 for *.vi[m] + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntMaskMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) +: %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorIntMaskMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t offset = bit_offset * microIdx; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorFloatMaskMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorFloatMaskMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, + this->microops.size()); + this->microops.push_back(microop); + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorFloatMaskMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1(rs1), vs2, old_vd, v0 for *.vv or *.vf + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorFloatMaskMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) +: %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorFloatMaskMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu = decltype(et::v); + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t offset = bit_offset * microIdx; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VMvWholeMacroDeclare {{ + +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VMvWholeMacroConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = _machInst.simm3 + 1; + StaticInstPtr microop; + + for (int i = 0; i < num_microops; ++i) { + microop = new %(class_name)sMicro(_machInst, 0, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VMvWholeMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[1]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VMvWholeMicroConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _microIdx]); +} + +}}; + +def template VMvWholeMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext* xc, Trace::InstRecord* traceData) const +{ + // TODO: Check register alignment. + // TODO: If vd is equal to vs2 the instruction is an architectural NOP. + %(op_decl)s; + %(op_rd)s; + for (size_t i = 0; i < (VLEN / 64); i++) { + %(code)s; + } + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorMaskDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorMaskConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorMaskExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu = uint8_t; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + // TODO: remove it + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +}; + +}}; + +def template VectorMaskDecodeBlock {{ + +return new %(class_name)s(machInst); + +}}; + +def template VectorNonSplitDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + RegId srcRegIdxArr[2]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorNonSplitConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + %(set_vm_idx)s; +} + +}}; + +def template VectorIntNonSplitExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorFloatNonSplitExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu = decltype(et::v); + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorReduceMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorReduceMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorReduceMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs2, vs1, vd, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorReduceMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) +: %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorReduceIntMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + %(type_def)s; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + auto reduce_loop = + [&, this](const auto& f, const auto* _, const auto* vs2) { + ElemType microop_result = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + if (this->vm || elem_mask(v0, ei)) { + microop_result = f(microop_result, Vs2[i]); + } + } + return microop_result; + }; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorReduceFloatMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + %(type_def)s; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + Vd[0] = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; + + auto reduce_loop = + [&, this](const auto& f, const auto* _, const auto* vs2) { + vu tmp_val = Vd[0]; + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + if (this->vm || elem_mask(v0, ei)) { + tmp_val = f(tmp_val, Vs2[i]).v; + } + } + return tmp_val; + }; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorReduceFloatWideningMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + %(type_def)s; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + Vd[0] = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; + + auto reduce_loop = + [&, this](const auto& f, const auto* _, const auto* vs2) { + vwu tmp_val = Vd[0]; + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + if (this->vm || elem_mask(v0, ei)) { + tmp_val = f(tmp_val, Vs2[i]).v; + } + } + return tmp_val; + }; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorGatherMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s{ +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorGatherMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + constexpr uint32_t vd_eewb = sizeof(ElemType); + constexpr uint32_t vs2_eewb = sizeof(ElemType); + constexpr uint32_t vs1_eewb = sizeof(IndexType); + constexpr bool vs1_split = vd_eewb > vs1_eewb; + const int8_t lmul = vtype_vlmul(vtype); + const int8_t vs1_emul = lmul + + (vs1_split ? -(vs2_eewb / vs1_eewb) : vs1_eewb / vs2_eewb); + const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; + const uint8_t vs1_vregs = vs1_emul < 0 ? 1 : 1 << vs1_emul; + const uint8_t vd_vregs = vs2_vregs; + const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs1_eewb); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (uint8_t i = 0; i < std::max(vs1_vregs, vd_vregs) && micro_vl > 0; + i++) { + for (uint8_t j = 0; j < vs2_vregs; j++) { + microop = new %(class_name)sMicro( + _machInst, micro_vl, i * vs2_vregs + j); + microop->setDelayedCommit(); + this->microops.push_back(microop); + } + micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorGatherMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs2, vs1, vd, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorGatherMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) +: %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + [[maybe_unused]] constexpr uint32_t vd_eewb = sizeof(ElemType); + [[maybe_unused]] constexpr uint32_t vs2_eewb = sizeof(ElemType); + [[maybe_unused]] constexpr uint32_t vs1_eewb = sizeof(IndexType); + constexpr uint8_t vs1_split_num = (vd_eewb + vs1_eewb - 1) / vs1_eewb; + constexpr uint8_t vd_split_num = (vs1_eewb + vd_eewb - 1) / vd_eewb; + const int8_t lmul = vtype_vlmul(vtype); + const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; + [[maybe_unused]] const uint8_t vs2_idx = _microIdx % vs2_vregs; + [[maybe_unused]] const uint8_t vs1_idx = + _microIdx / vs2_vregs / vs1_split_num; + [[maybe_unused]] const uint8_t vd_idx = + _microIdx / vs2_vregs / vd_split_num; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorGatherMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + const uint32_t vlmax = vtype_VLMAX(vtype); + constexpr uint8_t vd_eewb = sizeof(ElemType); + constexpr uint8_t vs1_eewb = sizeof(IndexType); + constexpr uint8_t vs2_eewb = sizeof(ElemType); + constexpr uint8_t vs1_split_num = (vd_eewb + vs1_eewb - 1) / vs1_eewb; + constexpr uint8_t vd_split_num = (vs1_eewb + vd_eewb - 1) / vd_eewb; + [[maybe_unused]] constexpr uint16_t vd_elems = VLENB / vd_eewb; + [[maybe_unused]] constexpr uint16_t vs1_elems = VLENB / vs1_eewb; + [[maybe_unused]] constexpr uint16_t vs2_elems = VLENB / vs2_eewb; + [[maybe_unused]] const int8_t lmul = vtype_vlmul(vtype); + [[maybe_unused]] const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; + [[maybe_unused]] const uint8_t vs2_idx = microIdx % vs2_vregs; + [[maybe_unused]] const uint8_t vs1_idx = + microIdx / vs2_vregs / vs1_split_num; + [[maybe_unused]] const uint8_t vd_idx = + microIdx / vs2_vregs / vd_split_num; + [[maybe_unused]] const uint16_t vs1_bias = + vs1_elems * (vd_idx % vs1_split_num) / vs1_split_num; + [[maybe_unused]] const uint16_t vd_bias = + vd_elems * (vs1_idx % vd_split_num) / vd_split_num; + + %(code)s; + %(op_wb)s; + + return NoFault; +} + +}}; + +def template VectorGatherDecodeBlock {{ + +switch(machInst.vtype8.vsew) { + case 0b000: { + using elem_type [[maybe_unused]] = uint8_t; + return new %(class_name)s(machInst); + } + case 0b001: { + using elem_type [[maybe_unused]] = uint16_t; + return new %(class_name)s(machInst); + } + case 0b010: { + using elem_type [[maybe_unused]] = uint32_t; + return new %(class_name)s(machInst); + } + case 0b011: { + using elem_type [[maybe_unused]] = uint64_t; + return new %(class_name)s(machInst); + } + default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorIntVxsatMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s{ +private: + %(reg_idx_arr_decl)s; + bool vxsat = false; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntVxsatMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, + micro_vl, i, &vxsat); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + microop = new VxsatMicroInst(&vxsat, _machInst); + microop->setFlag(StaticInst::IsSerializeAfter); + microop->setFlag(StaticInst::IsNonSpeculative); + this->microops.push_back(microop); + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} +}}; + +def template VectorIntVxsatMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; + bool* vxsatptr; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx, bool* vxsatptr); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntVxsatMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx, bool* vxsatptr) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + this->vxsatptr = vxsatptr; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorReduceIntWideningMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + using vwu [[maybe_unused]] = typename double_width::type; + using vwi [[maybe_unused]] = typename double_width::type; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + Vd[0] = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; + + auto reduce_loop = + [&, this](const auto& f, const auto* _, const auto* vs2) { + vwu tmp_val = Vd[0]; + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + if (this->vm || elem_mask(v0, ei)) { + tmp_val = f(tmp_val, Vs2[i]); + } + } + return tmp_val; + }; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorSlideMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorSlideUpMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + // Todo static filter out useless uop + int micro_idx = 0; + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + for (int j = 0; j <= i; ++j) { + microop = new %(class_name)sMicro( + _machInst, micro_vl, micro_idx++, i, j); + microop->setDelayedCommit(); + this->microops.push_back(microop); + } + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorSlideDownMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + // Todo static filter out useless uop + int micro_idx = 0; + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + for (int j = i; j < num_microops; ++j) { + microop = new %(class_name)sMicro( + _machInst, micro_vl, micro_idx++, i, j); + microop->setDelayedCommit(); + this->microops.push_back(microop); + } + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorSlideMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs2, vs1, vs3(old_vd), vm for *.vv, *.vx + // vs2, (old_vd), vm for *.vi + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx); + Fault execute(ExecContext* xc, Trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorSlideMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, + _microIdx, _vdIdx, _vs2Idx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorSlideMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +}; + +}}; + +def template VectorFloatSlideMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu = decltype(et::v); + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +}; + +}}; \ No newline at end of file diff --git a/src/arch/riscv/isa/templates/vector_mem.isa b/src/arch/riscv/isa/templates/vector_mem.isa new file mode 100644 index 0000000000..ecfda4ad2d --- /dev/null +++ b/src/arch/riscv/isa/templates/vector_mem.isa @@ -0,0 +1,1349 @@ +def template VMemMacroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VMemTemplateMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VleConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + microop->setFlag(IsLoad); + this->microops.push_back(microop); + micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VleMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, + _microIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + } + + Fault execute(ExecContext *, Trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, Trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + Trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; + +}; + +}}; + +def template VleMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, Trace::InstRecord *traceData) const +{ + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = width_EEW(machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); + Fault fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, + byte_enable); + if (fault != NoFault) + return fault; + + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_elems = VLEN / width_EEW(machInst.width); + size_t ei; + for (size_t i = 0; i < micro_elems; i++) { + ei = i + micro_vlmax * microIdx; + %(memacc_code)s; + } + + %(op_wb)s; + return fault; +} + +}}; + +def template VleMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + Addr EA; + + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + + uint32_t mem_size = width_EEW(this->machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); + Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, + byte_enable); + return fault; +} + +}}; + +def template VleMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + Trace::InstRecord *traceData) const +{ + %(op_decl)s; + %(op_rd)s; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_elems = VLEN / width_EEW(machInst.width); + size_t ei; + for (size_t i = 0; i < micro_elems; i++) { + ei = i + micro_vlmax * microIdx; + %(memacc_code)s; + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VseConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + microop->setFlag(IsStore); + this->microops.push_back(microop); + micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); +} + +}}; + +def template VseMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[0]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsVector] = true; + this->flags[IsStore] = true; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, Trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + Trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VseMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, Trace::InstRecord *traceData) const +{ + Addr EA; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t eewb = width_EEW(machInst.width) / 8; + const size_t mem_size = eewb * microVl; + std::vector byte_enable(mem_size, false); + size_t ei; + for (size_t i = 0; i < microVl; i++) { + ei = i + micro_vlmax * microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; + auto it = byte_enable.begin() + i * eewb; + std::fill(it, it + eewb, true); + } + } + + Fault fault; + fault = xc->writeMem(Mem.as(), mem_size, EA, memAccessFlags, + nullptr, byte_enable); + return fault; +} + +}}; + +def template VseMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + Addr EA; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t eewb = width_EEW(machInst.width) / 8; + const size_t mem_size = eewb * microVl; + std::vector byte_enable(mem_size, false); + size_t ei; + for (size_t i = 0; i < microVl; i++) { + ei = i + micro_vlmax * microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; + auto it = byte_enable.begin() + i * eewb; + std::fill(it, it + eewb, true); + } + } + + Fault fault; + fault = xc->writeMem(Mem.as(), mem_size, EA, memAccessFlags, + nullptr, byte_enable); + return fault; +} + +}}; + +def template VseMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + Trace::InstRecord* traceData) const +{ + return NoFault; +} + +}}; + +def template VlmConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + } else { + microop = new Vle8_vMicro(_machInst, micro_vl, 0); + microop->setDelayedCommit(); + microop->setFlag(IsLoad); + } + this->microops.push_back(microop); + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VsmConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; + + StaticInstPtr microop; + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + } else { + microop = new Vse8_vMicro(_machInst, micro_vl, 0); + microop->setDelayedCommit(); + microop->setFlag(IsStore); + } + this->microops.push_back(microop); + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VsWholeConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + size_t NFIELDS = machInst.nf + 1; + const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + + StaticInstPtr microop; + for (int i = 0; i < NFIELDS; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop->setDelayedCommit(); + microop->setFlag(IsStore); + this->microops.push_back(microop); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VsWholeMicroDeclare {{ + +class %(class_name)s: public %(base_class)s +{ +private: + RegId destRegIdxArr[0]; + RegId srcRegIdxArr[2]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]); + this->flags[IsVector] = true; + this->flags[IsStore] = true; + } + Fault execute(ExecContext *, Trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, Trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + Trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VsWholeMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, Trace::InstRecord *traceData) const +{ + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + for (size_t i = 0; i < VLENB; i++) { + %(memacc_code)s; + } + + Fault fault = writeMemAtomicLE(xc, traceData, *(vreg_t::Container*)(&Mem), + EA, memAccessFlags, nullptr); + return fault; +} + +}}; + +def template VsWholeMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + for (size_t i = 0; i < VLENB; i++) { + %(memacc_code)s; + } + + Fault fault = writeMemTimingLE(xc, traceData, *(vreg_t::Container*)(&Mem), + EA, memAccessFlags, nullptr); + return fault; +} + +}}; + +def template VsWholeMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + Trace::InstRecord* traceData) const +{ + return NoFault; +} + +}}; + +def template VlWholeConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + size_t NFIELDS = machInst.nf + 1; + const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + + StaticInstPtr microop; + for (int i = 0; i < NFIELDS; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop->setDelayedCommit(); + microop->setFlag(IsLoad); + this->microops.push_back(microop); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VlWholeMicroDeclare {{ + +class %(class_name)s: public %(base_class)s +{ +private: + RegId destRegIdxArr[1]; + RegId srcRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s_micro", _machInst, %(op_class)s, + _microVl, _microIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + this->flags[IsVector] = true; + this->flags[IsLoad] = true; + } + Fault execute(ExecContext *, Trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, Trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + Trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VlWholeMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, Trace::InstRecord *traceData) const +{ + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + Fault fault = readMemAtomicLE(xc, traceData, EA, + *(vreg_t::Container*)(&Mem), memAccessFlags); + if (fault != NoFault) + return fault; + + size_t elem_per_reg = VLEN / width_EEW(machInst.width); + for (size_t i = 0; i < elem_per_reg; i++) { + %(memacc_code)s; + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VlWholeMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + Addr EA; + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + + Fault fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags); + return fault; +} + +}}; + +def template VlWholeMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + Trace::InstRecord* traceData) const +{ + %(op_decl)s; + %(op_rd)s; + + memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + + size_t elem_per_reg = VLEN / width_EEW(machInst.width); + for (size_t i = 0; i < elem_per_reg; ++i) { + %(memacc_code)s; + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VlStrideConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + int32_t remaining_vl = this->vl; + // Num of elems in one vreg + int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; micro_vl > 0; ++i) { + for (int j = 0; j < micro_vl; ++j) { + microop = new %(class_name)sMicro(machInst, i, j, micro_vl); + microop->setFlag(IsDelayedCommit); + microop->setFlag(IsLoad); + this->microops.push_back(microop); + } + remaining_vl -= num_elems_per_vreg; + micro_vl = std::min(remaining_vl, num_elems_per_vreg); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); + this->flags[IsVector] = true; +} + +}}; + +def template VlStrideMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + // rs1, rs2, vd, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, + uint8_t _microVl) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _regIdx, _microIdx, _microVl) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _regIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]); + // We treat agnostic as undistrubed + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _regIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsLoad] = true; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, Trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + Trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VlStrideMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, Trace::InstRecord *traceData) const +{ + Fault fault = NoFault; + Addr EA; + + %(op_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vd[0]); + %(ea_code)s; // ea_code depends on elem_size + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + fault = xc->readMem(EA, Mem.as(), mem_size, + memAccessFlags, byte_enable); + if (fault != NoFault) + return fault; + %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ + } + + %(op_wb)s; + return fault; +} + +}}; + +def template VlStrideMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + Fault fault = NoFault; + Addr EA; + + %(op_src_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vd[0]); + %(ea_code)s; // ea_code depends on elem_size + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + bool need_load = machInst.vm || elem_mask(v0, ei); + const std::vector byte_enable(mem_size, need_load); + fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); + return fault; +} + +}}; + +def template VlStrideMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + Trace::InstRecord *traceData) const +{ + %(op_decl)s; + %(op_rd)s; + + constexpr uint8_t elem_size = sizeof(Vd[0]); + + RiscvISA::vreg_t old_vd; + decltype(Vd) old_Vd = nullptr; + // We treat agnostic as undistrubed + xc->getRegOperand(this, 2, &old_vd); + old_Vd = old_vd.as >(); + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + if (microIdx == 0) { + // treat vma as vmu + // if (machInst.vtype8.vma == 0) + memcpy(Vd, old_Vd, microVl * elem_size); + // treat vta as vtu + // if (machInst.vtype8.vta == 0) + memcpy(Vd + microVl, old_Vd + microVl, VLENB - microVl * elem_size); + } else { + memcpy(Vd, old_Vd, VLENB); + } + + size_t ei = this->regIdx * VLENB / sizeof(Vd[0]) + this->microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VsStrideConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + int32_t remaining_vl = this->vl; + // Num of elems in one vreg + int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; micro_vl > 0; ++i) { + for (int j = 0; j < micro_vl; ++j) { + microop = new %(class_name)sMicro(machInst, i, j, micro_vl); + microop->setFlag(IsDelayedCommit); + microop->setFlag(IsStore); + this->microops.push_back(microop); + } + remaining_vl -= num_elems_per_vreg; + micro_vl = std::min(remaining_vl, num_elems_per_vreg); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); + this->flags[IsVector] = true; +} + +}}; + +def template VsStrideMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + // rs1, rs2, vs3, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[0]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, + uint8_t _microVl) + : %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s, + _regIdx, _microIdx, _microVl) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _regIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsStore] = true; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, Trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + Trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VsStrideMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, Trace::InstRecord *traceData) const +{ + Fault fault = NoFault; + Addr EA; + + %(op_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vs3[0]); + %(ea_code)s; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; + fault = xc->writeMem(Mem.as(), mem_size, EA, + memAccessFlags, nullptr, byte_enable); + } + return fault; +} + +}}; + +def template VsStrideMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + Fault fault = NoFault; + Addr EA; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + %(op_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vs3[0]); + %(ea_code)s; + + uint32_t mem_size = elem_size; + size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + bool need_store = machInst.vm || elem_mask(v0, ei); + if (need_store) { + const std::vector byte_enable(mem_size, need_store); + %(memacc_code)s; + fault = xc->writeMem(Mem.as(), mem_size, EA, + memAccessFlags, nullptr, byte_enable); + } + return fault; +} + +}}; + +def template VsStrideMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + Trace::InstRecord* traceData) const +{ + return NoFault; +} + +}}; + +def template VlIndexConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const uint32_t vd_eewb = sizeof(ElemType); + const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; + const uint8_t vs2_split_num = (vd_eewb + vs2_eewb - 1) / vs2_eewb; + const uint8_t vd_split_num = (vs2_eewb + vd_eewb - 1) / vd_eewb; + const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs2_eewb); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (uint8_t i = 0; micro_vl > 0; i++) { + for (uint8_t j = 0; j < micro_vl; ++j) { + uint8_t vdRegIdx = i / vd_split_num; + uint8_t vs2RegIdx = i / vs2_split_num; + uint8_t vdElemIdx = j + micro_vlmax * (i % vd_split_num); + uint8_t vs2ElemIdx = j + micro_vlmax * (i % vs2_split_num); + microop = new %(class_name)sMicro(machInst, + vdRegIdx, vdElemIdx, vs2RegIdx, vs2ElemIdx); + microop->setFlag(IsDelayedCommit); + microop->setFlag(IsLoad); + this->microops.push_back(microop); + } + remaining_vl -= micro_vlmax; + micro_vl = std::min(remaining_vl, micro_vlmax); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); + this->flags[IsVector] = true; +} + +}}; + +def template VlIndexMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // rs1, vs2, vd, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _vdRegIdx, uint8_t _vdElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _vdRegIdx, _vdElemIdx, _vs2RegIdx, _vs2ElemIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _vdRegIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]); + // We treat agnostic as undistrubed + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _vdRegIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsLoad] = true; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, Trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + Trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VlIndexMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext *xc, + Trace::InstRecord *traceData)const +{ + using vu = std::make_unsigned_t; + Fault fault = NoFault; + Addr EA; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + constexpr uint8_t elem_size = sizeof(Vd[0]); + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + if (machInst.vm || elem_mask(v0, ei)) { + fault = xc->readMem(EA, Mem.as(), mem_size, + memAccessFlags, byte_enable); + if (fault != NoFault) + return fault; + %(memacc_code)s; /* Vd[this->vdElemIdx] = Mem[0]; */ + } + + %(op_wb)s; + return fault; +} + +}}; + +def template VlIndexMicroInitiateAcc {{ + +template +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu = std::make_unsigned_t; + Fault fault = NoFault; + Addr EA; + + %(op_src_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vd[0]); + %(ea_code)s; // ea_code depends on elem_size + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + bool need_load = machInst.vm || elem_mask(v0, ei); + const std::vector byte_enable(mem_size, need_load); + fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); + return fault; +} + +}}; + +def template VlIndexMicroCompleteAcc {{ + +template +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + Trace::InstRecord *traceData) const +{ + using vu = std::make_unsigned_t; + %(op_decl)s; + %(op_rd)s; + + constexpr uint8_t elem_size = sizeof(Vd[0]); + + RiscvISA::vreg_t old_vd; + decltype(Vd) old_Vd = nullptr; + // We treat agnostic as undistrubed + xc->getRegOperand(this, 2, &old_vd); + old_Vd = old_vd.as >(); + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + memcpy(Vd, old_Vd, VLENB); + + size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + if (machInst.vm || elem_mask(v0, ei)) { + memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VsIndexConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const uint32_t vs3_eewb = sizeof(ElemType); + const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; + const uint8_t vs2_split_num = (vs3_eewb + vs2_eewb - 1) / vs2_eewb; + const uint8_t vs3_split_num = (vs2_eewb + vs3_eewb - 1) / vs3_eewb; + const int32_t micro_vlmax = VLENB / std::max(vs3_eewb, vs2_eewb); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (uint8_t i = 0; micro_vl > 0; i++) { + for (uint8_t j = 0; j < micro_vl; ++j) { + uint8_t vs3RegIdx = i / vs3_split_num; + uint8_t vs2RegIdx = i / vs2_split_num; + uint8_t vs3ElemIdx = j + micro_vlmax * (i % vs3_split_num); + uint8_t vs2ElemIdx = j + micro_vlmax * (i % vs2_split_num); + microop = new %(class_name)sMicro(machInst, + vs3RegIdx, vs3ElemIdx, vs2RegIdx, vs2ElemIdx); + microop->setFlag(IsDelayedCommit); + microop->setFlag(IsStore); + this->microops.push_back(microop); + } + remaining_vl -= micro_vlmax; + micro_vl = std::min(remaining_vl, micro_vlmax); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); + this->flags[IsVector] = true; +} + +}}; + +def template VsIndexMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // rs1, vs2, vs3, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[0]; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _vs3RegIdx, _vs3ElemIdx, _vs2RegIdx, _vs2ElemIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]); + // We treat agnostic as undistrubed + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _vs3RegIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsStore] = true; + } + + Fault execute(ExecContext *, Trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, Trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + Trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VsIndexMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext *xc, + Trace::InstRecord *traceData)const +{ + using vu = std::make_unsigned_t; + Fault fault = NoFault; + Addr EA; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + constexpr uint8_t elem_size = sizeof(Vs3[0]); + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ + fault = xc->writeMem(Mem.as(), mem_size, EA, + memAccessFlags, nullptr, byte_enable); + } + return fault; +} + +}}; + +def template VsIndexMicroInitiateAcc {{ + +template +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + Trace::InstRecord* traceData) const +{ + using vu = std::make_unsigned_t; + Fault fault = NoFault; + Addr EA; + + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + constexpr uint8_t elem_size = sizeof(Vs3[0]); + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + constexpr uint8_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ + fault = xc->writeMem(Mem.as(), mem_size, EA, + memAccessFlags, nullptr, byte_enable); + } + return fault; +} + +}}; + +def template VsIndexMicroCompleteAcc {{ + +template +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + Trace::InstRecord* traceData) const +{ + return NoFault; +} + +}}; + +def template VMemTemplateDecodeBlock {{ + +switch(machInst.vtype8.vsew) { + case 0b000: { + return new %(class_name)s(machInst); + } + case 0b001: { + return new %(class_name)s(machInst); + } + case 0b010: { + return new %(class_name)s(machInst); + } + case 0b011: { + return new %(class_name)s(machInst); + } + default: GEM5_UNREACHABLE; +} + +}}; \ No newline at end of file diff --git a/src/arch/riscv/regs/float.hh b/src/arch/riscv/regs/float.hh index 1654bdb627..b505cd2641 100644 --- a/src/arch/riscv/regs/float.hh +++ b/src/arch/riscv/regs/float.hh @@ -105,7 +105,10 @@ static constexpr float64_t f64(freg_t r) { return r; } static constexpr freg_t freg(float16_t f) { return {boxF16(f.v)}; } static constexpr freg_t freg(float32_t f) { return {boxF32(f.v)}; } static constexpr freg_t freg(float64_t f) { return f; } -static constexpr freg_t freg(uint_fast16_t f) { return {f}; } + +static constexpr freg_t freg(uint16_t f) { return {boxF16(f)}; } +static constexpr freg_t freg(uint32_t f) { return {boxF32(f)}; } +static constexpr freg_t freg(uint64_t f) { return {f}; } namespace float_reg { @@ -211,7 +214,21 @@ const std::vector RegNames = { } // namespace float_reg +inline float32_t +fsgnj32(float32_t a, float32_t b, bool n, bool x) { + if (n) b.v = ~b.v; + else if (x) b.v = a.v ^ b.v; + return f32(insertBits(b.v, 30, 0, a.v)); +} + +inline float64_t +fsgnj64(float64_t a, float64_t b, bool n, bool x) { + if (n) b.v = ~b.v; + else if (x) b.v = a.v ^ b.v; + return f64(insertBits(b.v, 62, 0, a.v)); +} + } // namespace RiscvISA } // namespace gem5 -#endif // __ARCH_RISCV_REGS_FLOAT_HH__ +#endif // __ARCH_RISCV_REGS_FLOAT_HH__ \ No newline at end of file diff --git a/src/arch/riscv/regs/misc.hh b/src/arch/riscv/regs/misc.hh index 7f6fff4e00..c0fb46679c 100644 --- a/src/arch/riscv/regs/misc.hh +++ b/src/arch/riscv/regs/misc.hh @@ -191,6 +191,14 @@ enum MiscRegIndex MISCREG_FFLAGS, MISCREG_FRM, + MISCREG_VSTART, + MISCREG_VXSAT, + MISCREG_VXRM, + MISCREG_VCSR, + MISCREG_VL, + MISCREG_VTYPE, + MISCREG_VLENB, + // These registers are not in the standard, hence does not exist in the // CSRData map. These are mainly used to provide a minimal implementation // for non-maskable-interrupt in our simple cpu. @@ -371,7 +379,15 @@ enum CSRIndex CSR_TDATA3 = 0x7A3, CSR_DCSR = 0x7B0, CSR_DPC = 0x7B1, - CSR_DSCRATCH = 0x7B2 + CSR_DSCRATCH = 0x7B2, + + CSR_VSTART = 0x008, + CSR_VXSAT = 0x009, + CSR_VXRM = 0x00A, + CSR_VCSR = 0x00F, + CSR_VL = 0xC20, + CSR_VTYPE = 0xC21, + CSR_VLENB = 0xC22 }; struct CSRMetadata @@ -541,7 +557,15 @@ const std::unordered_map CSRData = { {CSR_TDATA3, {"tdata3", MISCREG_TDATA3}}, {CSR_DCSR, {"dcsr", MISCREG_DCSR}}, {CSR_DPC, {"dpc", MISCREG_DPC}}, - {CSR_DSCRATCH, {"dscratch", MISCREG_DSCRATCH}} + {CSR_DSCRATCH, {"dscratch", MISCREG_DSCRATCH}}, + + {CSR_VSTART, {"vstart", MISCREG_VSTART}}, + {CSR_VXSAT, {"vxsat" , MISCREG_VXSAT}}, + {CSR_VXRM, {"vxrm" , MISCREG_VXRM}}, + {CSR_VCSR, {"vcsr" , MISCREG_VCSR}}, + {CSR_VL, {"vl" , MISCREG_VL}}, + {CSR_VTYPE, {"vtype" , MISCREG_VTYPE}}, + {CSR_VLENB, {"VLENB" , MISCREG_VLENB}} }; /** @@ -600,6 +624,7 @@ const off_t MXL_OFFSETS[enums::Num_RiscvType] = { const off_t SXL_OFFSET = 34; const off_t UXL_OFFSET = 32; const off_t FS_OFFSET = 13; +const off_t VS_OFFSET = 9; const off_t FRM_OFFSET = 5; const RegVal ISA_MXL_MASKS[enums::Num_RiscvType] = { diff --git a/src/arch/riscv/regs/vector.hh b/src/arch/riscv/regs/vector.hh new file mode 100644 index 0000000000..bb7e3c13b2 --- /dev/null +++ b/src/arch/riscv/regs/vector.hh @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2022 PLCT Lab + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef __ARCH_RISCV_REGS_VECTOR_HH__ +#define __ARCH_RISCV_REGS_VECTOR_HH__ + +#include +#include +#include + +#include "arch/generic/vec_pred_reg.hh" +#include "arch/generic/vec_reg.hh" +#include "cpu/reg_class.hh" +#include "base/bitunion.hh" +#include "debug/VecRegs.hh" + +namespace gem5 +{ + +namespace RiscvISA +{ + +constexpr unsigned NumVecElemPerVecReg = 4; +using VecElem = uint64_t; +constexpr size_t VLENB = NumVecElemPerVecReg * sizeof(VecElem); +constexpr size_t VLEN = VLENB * 8; +constexpr uint32_t ELEN = sizeof(VecElem) * 8; + +using VecRegContainer = + gem5::VecRegContainer; +using vreg_t = VecRegContainer; + +using VecPredReg = + gem5::VecPredRegT; +using ConstVecPredReg = + gem5::VecPredRegT; +using VecPredRegContainer = VecPredReg::Container; + +const int NumVecStandardRegs = 32; +const int NumVecInternalRegs = 8; +const int NumVecRegs = NumVecStandardRegs + NumVecInternalRegs; + +const std::vector VecRegNames = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + "vtmp0", "vtmp1", "vtmp2", "vtmp3", "vtmp4", "vtmp5", "vtmp6", "vtmp7" +}; + +const int VecMemInternalReg0 = NumVecStandardRegs; + +static inline VecElemRegClassOps + vecRegElemClassOps(NumVecElemPerVecReg); +static inline TypedRegClassOps vecRegClassOps; + +inline constexpr RegClass vecRegClass = + RegClass(VecRegClass, VecRegClassName, NumVecRegs, debug::VecRegs). + ops(vecRegClassOps). + regType(); +inline constexpr RegClass vecElemClass = + RegClass(VecElemClass, VecElemClassName, NumVecRegs * NumVecElemPerVecReg, + debug::VecRegs). + ops(vecRegElemClassOps); + +BitUnion32(VTYPE) + Bitfield<31> vill; + Bitfield<7, 0> vtype8; + Bitfield<7> vma; + Bitfield<6> vta; + Bitfield<5, 3> vsew; + Bitfield<2, 0> vlmul; +EndBitUnion(VTYPE) + +} // namespace RiscvISA +} // namespace gem5 + +#endif // __ARCH_RISCV_REGS_VECTOR_HH__ diff --git a/src/arch/riscv/types.hh b/src/arch/riscv/types.hh index 4aae1a027b..200e33e9a5 100644 --- a/src/arch/riscv/types.hh +++ b/src/arch/riscv/types.hh @@ -44,6 +44,7 @@ #include "arch/riscv/pcstate.hh" #include "base/bitunion.hh" +#include "base/logging.hh" namespace gem5 { @@ -51,13 +52,133 @@ namespace gem5 namespace RiscvISA { +constexpr auto XLEN = sizeof(RegVal) * 8; + typedef uint32_t MachInst; // This should be further extend someday when we start to support 64b+ inst. // For now, we should be safe using the msbs to store extra information. BitUnion64(ExtMachInst) // Decoder state - Bitfield<63, 62> rv_type; + Bitfield<63, 62> rv_type; + Bitfield<61> compressed; + // More bits for vector extension + Bitfield<52, 41> vl; + Bitfield<40> vill; + SubBitUnion(vtype8, 39, 32) // exclude vill + Bitfield<39> vma; + Bitfield<38> vta; + Bitfield<37, 35> vsew; + Bitfield<34, 32> vlmul; + EndSubBitUnion(vtype8) + // Common + uint32_t instBits; + Bitfield< 1, 0> quadRant; + Bitfield< 6, 2> opcode; + Bitfield< 6, 0> opcode7; + // R-Type + Bitfield<31, 0> all; + Bitfield<11, 7> rd; + Bitfield<14, 12> funct3; + Bitfield<19, 15> rs1; + Bitfield<24, 20> rs2; + Bitfield<31, 25> funct7; + // Bit shifts + Bitfield<30> srType; + Bitfield<24, 20> shamt5; + Bitfield<25, 20> shamt6; + // I-Type + Bitfield<31, 20> imm12; + // Sync + Bitfield<23, 20> succ; + Bitfield<27, 24> pred; + // S-Type + Bitfield<11, 7> imm5; + Bitfield<31, 25> imm7; + // U-Type + Bitfield<31, 12> imm20; + // SB-Type + Bitfield<7> bimm12bit11; + Bitfield<11, 8> bimm12bits4to1; + Bitfield<30, 25> bimm12bits10to5; + Bitfield<31> immsign; + // UJ-Type + Bitfield<30, 21> ujimmbits10to1; + Bitfield<20> ujimmbit11; + Bitfield<19, 12> ujimmbits19to12; + // System + Bitfield<31, 20> funct12; + Bitfield<19, 15> csrimm; + // Floating point + Bitfield<11, 7> fd; + Bitfield<19, 15> fs1; + Bitfield<24, 20> fs2; + Bitfield<31, 27> fs3; + Bitfield<14, 12> round_mode; + Bitfield<24, 20> conv_sgn; + Bitfield<26, 25> funct2; + // AMO + Bitfield<31, 27> amofunct; + Bitfield<26> aq; + Bitfield<25> rl; + // Compressed + Bitfield<15, 13> copcode; + Bitfield<12> cfunct1; + Bitfield<11, 10> cfunct2high; + Bitfield< 6, 5> cfunct2low; + Bitfield<11, 7> rc1; + Bitfield< 6, 2> rc2; + Bitfield< 9, 7> rp1; + Bitfield< 4, 2> rp2; + Bitfield<11, 7> fc1; + Bitfield< 6, 2> fc2; + Bitfield< 4, 2> fp2; + Bitfield<12, 2> cjumpimm; + Bitfield< 5, 3> cjumpimm3to1; + Bitfield<11, 11> cjumpimm4to4; + Bitfield< 2, 2> cjumpimm5to5; + Bitfield< 7, 7> cjumpimm6to6; + Bitfield< 6, 6> cjumpimm7to7; + Bitfield<10, 9> cjumpimm9to8; + Bitfield< 8, 8> cjumpimm10to10; + Bitfield<12> cjumpimmsign; + Bitfield<12, 5> cimm8; + Bitfield<12, 7> cimm6; + Bitfield< 6, 2> cimm5; + Bitfield<12, 10> cimm3; + Bitfield< 6, 5> cimm2; + Bitfield<12> cimm1; + // Pseudo instructions + Bitfield<31, 25> m5func; + // vector + Bitfield<31, 26> vfunct6; + Bitfield<31, 27> vfunct5; + Bitfield<27, 25> vfunct3; + Bitfield<26, 25> vfunct2; + Bitfield<31, 29> nf; + Bitfield<28> mew; + Bitfield<27, 26> mop; + Bitfield<25> vm; + Bitfield<24, 20> lumop; + Bitfield<24, 20> sumop; + Bitfield<14, 12> width; + Bitfield<24, 20> vs2; + Bitfield<19, 15> vs1; + Bitfield<11, 7> vd; + Bitfield<11, 7> vs3; + Bitfield<19, 15> vecimm; + Bitfield<17, 15> simm3; + // vsetvli + Bitfield<31> bit31; + Bitfield<30> bit30; + Bitfield<30, 20> zimm_vsetvli; + // vsetivli + Bitfield<31, 30> bit31_30; + Bitfield<29, 20> zimm_vsetivli; + Bitfield<19, 15> uimm_vsetivli; + // vsetvl + Bitfield<31, 25> bit31_25; + EndBitUnion(ExtMachInst) } // namespace RiscvISA diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh index 3bd34c4801..f085863c2f 100644 --- a/src/arch/riscv/utility.hh +++ b/src/arch/riscv/utility.hh @@ -51,6 +51,7 @@ #include "arch/riscv/regs/float.hh" #include "arch/riscv/regs/int.hh" +#include "arch/riscv/regs/vector.hh" #include "base/types.hh" #include "cpu/reg_class.hh" #include "cpu/static_inst.hh" @@ -129,7 +130,14 @@ registerName(RegId reg) return str.str(); } return float_reg::RegNames[reg.index()]; - } else { + } else if (reg.is(VecRegClass)) { + if (reg.index() >= NumVecRegs) { + std::stringstream str; + str << "?? (v" << reg.index() << ')'; + return str.str(); + } + return VecRegNames[reg.index()]; + } else { /* It must be an InvalidRegClass, in RISC-V we should treat it as a * zero register for the disassembler to work correctly. */ @@ -137,6 +145,576 @@ registerName(RegId reg) } } +// Vector extension functions +inline uint64_t +vtype_SEW(const uint64_t vtype) +{ + return 8 << bits(vtype, 5, 3); +} + +/* +* Encode LMUL to lmul as follows: +* LMUL vlmul lmul +* 1 000 0 +* 2 001 1 +* 4 010 2 +* 8 011 3 +* - 100 - +* 1/8 101 -3 +* 1/4 110 -2 +* 1/2 111 -1 +* +* then, we can calculate VLMAX = vlen >> (vsew + 3 - lmul) +* e.g. vlen = 256 bits, SEW = 16, LMUL = 1/8 +* => VLMAX = vlen >> (1 + 3 - (-3)) +* = 256 >> 7 +* = 2 +* Ref: https://github.com/qemu/qemu/blob/5e9d14f2/target/riscv/cpu.h +*/ +inline uint64_t +vtype_VLMAX(const uint64_t vtype, const bool per_reg = false) +{ + int64_t lmul = (int64_t)sext<3>(bits(vtype, 2, 0)); + lmul = per_reg ? std::min(0, lmul) : lmul; + int64_t vsew = bits(vtype, 5, 3); + return gem5::RiscvISA::VLEN >> (vsew + 3 - lmul); +} + +inline int64_t +vtype_vlmul(const uint64_t vtype) +{ + return (int64_t)sext<3>(bits(vtype, 2, 0)); +} + +inline uint64_t +vtype_regs_per_group(const uint64_t vtype) +{ + int64_t lmul = (int64_t)sext<3>(bits(vtype, 2, 0)); + return 1 << std::max(0, lmul); +} + +inline void +vtype_set_vill(uint64_t& vtype) +{ + vtype = (uint64_t)0 ^ (1UL << (sizeof(RegVal) * 8 - 1)); +} + +inline uint64_t +width_EEW(uint64_t width) +{ + switch (width) { + case 0b000: return 8; + case 0b101: return 16; + case 0b110: return 32; + case 0b111: return 64; + default: GEM5_UNREACHABLE; + } +} + +/* + * Spec Section 4.5 + * Ref: + * https://github.com/qemu/qemu/blob/c7d773ae/target/riscv/vector_helper.c +*/ +template +inline int +elem_mask(const T* vs, const int index) +{ + static_assert(std::is_integral_v); + int idx = index / (sizeof(T)*8); + int pos = index % (sizeof(T)*8); + return (vs[idx] >> pos) & 1; +} + +inline uint64_t +mulhu(uint64_t a, uint64_t b) +{ + uint64_t a_lo = (uint32_t)a; + uint64_t a_hi = a >> 32; + uint64_t b_lo = (uint32_t)b; + uint64_t b_hi = b >> 32; + + uint64_t hi = a_hi * b_hi; + uint64_t mid1 = a_hi * b_lo; + uint64_t mid2 = a_lo * b_hi; + uint64_t lo = a_lo * b_lo; + uint64_t carry = ((uint64_t)(uint32_t)mid1 + + (uint64_t)(uint32_t)mid2 + (lo >> 32)) >> 32; + + return hi + (mid1 >> 32) + (mid2 >> 32) + carry; +} + +inline int64_t +mulh(int64_t a, int64_t b) +{ + int negate = (a < 0) != (b < 0); + uint64_t res = mulhu(a < 0 ? -a : a, b < 0 ? -b : b); + return negate ? ~res + (a * b == 0) : res; +} + +inline int64_t +mulhsu(int64_t a, uint64_t b) +{ + bool negate = a < 0; + uint64_t res = mulhu(a < 0 ? -a : a, b); + return negate ? ~res + (a * b == 0) : res; +} + +template struct double_width; +template<> struct double_width { using type = uint16_t;}; +template<> struct double_width { using type = uint32_t;}; +template<> struct double_width { using type = uint64_t;}; +template<> struct double_width { using type = int16_t; }; +template<> struct double_width { using type = int32_t; }; +template<> struct double_width { using type = int64_t; }; +template<> struct double_width { using type = float64_t;}; + +template struct double_widthf; +template<> struct double_widthf { using type = float64_t;}; +template<> struct double_widthf { using type = float64_t;}; + +template auto +ftype(IntType a) -> FloatType +{ + if constexpr(std::is_same_v) + return f32(a); + else if constexpr(std::is_same_v) + return f64(a); + GEM5_UNREACHABLE; +} + +// TODO: Consolidate ftype_freg(freg_t a) and ftype(IntType a) into a +// single function +template auto +ftype_freg(freg_t a) -> FloatType +{ + if constexpr(std::is_same_v) + return f32(a); + else if constexpr(std::is_same_v) + return f64(a); + GEM5_UNREACHABLE; +} + +template FloatType +fadd(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_add(a, b); + else if constexpr(std::is_same_v) + return f64_add(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fsub(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_sub(a, b); + else if constexpr(std::is_same_v) + return f64_sub(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fmin(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_min(a, b); + else if constexpr(std::is_same_v) + return f64_min(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fmax(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_max(a, b); + else if constexpr(std::is_same_v) + return f64_max(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fdiv(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_div(a, b); + else if constexpr(std::is_same_v) + return f64_div(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fmul(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_mul(a, b); + else if constexpr(std::is_same_v) + return f64_mul(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fsqrt(FloatType a) +{ + if constexpr(std::is_same_v) + return f32_sqrt(a); + else if constexpr(std::is_same_v) + return f64_sqrt(a); + GEM5_UNREACHABLE; +} + +template FloatType +frsqrte7(FloatType a) +{ + if constexpr(std::is_same_v) + return f32_rsqrte7(a); + else if constexpr(std::is_same_v) + return f64_rsqrte7(a); + GEM5_UNREACHABLE; +} + +template FloatType +frecip7(FloatType a) +{ + if constexpr(std::is_same_v) + return f32_recip7(a); + else if constexpr(std::is_same_v) + return f64_recip7(a); + GEM5_UNREACHABLE; +} + +template FloatType +fclassify(FloatType a) +{ + if constexpr(std::is_same_v) + return f32(f32_classify(a)); + else if constexpr(std::is_same_v) + return f64(f64_classify(a)); + GEM5_UNREACHABLE; +} + +template FloatType +fsgnj(FloatType a, FloatType b, bool n, bool x) +{ + if constexpr(std::is_same_v) + return fsgnj32(a, b, n, x); + else if constexpr(std::is_same_v) + return fsgnj64(a, b, n, x); + GEM5_UNREACHABLE; +} + +template bool +fle(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_le(a, b); + else if constexpr(std::is_same_v) + return f64_le(a, b); + GEM5_UNREACHABLE; +} + +template bool +feq(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_eq(a, b); + else if constexpr(std::is_same_v) + return f64_eq(a, b); + GEM5_UNREACHABLE; +} + +template bool +flt(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_lt(a, b); + else if constexpr(std::is_same_v) + return f64_lt(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fmadd(FloatType a, FloatType b, FloatType c) +{ + if constexpr(std::is_same_v) + return f32_mulAdd(a, b, c); + else if constexpr(std::is_same_v) + return f64_mulAdd(a, b, c); + GEM5_UNREACHABLE; +} + +template FloatType +fneg(FloatType a) +{ + if constexpr(std::is_same_v) + return f32(a.v ^ uint32_t(mask(31, 31))); + else if constexpr(std::is_same_v) + return f64(a.v ^ mask(63, 63)); + GEM5_UNREACHABLE; +} + +template::type> WFT +fwiden(FT a) +{ + if constexpr(std::is_same_v) + return f32_to_f64(a); + GEM5_UNREACHABLE; +} + +template IntType +f_to_ui(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return f32_to_ui32(a, mode, true); + else if constexpr(std::is_same_v) + return f64_to_ui64(a, mode, true); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename IntType = decltype(double_width::type::v) +> IntType +f_to_wui(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return f32_to_ui64(a, mode, true); + GEM5_UNREACHABLE; +} + +template< + typename IntType, + typename FloatType = typename double_widthf::type +> IntType +f_to_nui(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return f64_to_ui32(a, mode, true); + GEM5_UNREACHABLE; +} + +template IntType +f_to_i(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return (uint32_t)f32_to_i32(a, mode, true); + else if constexpr(std::is_same_v) + return (uint64_t)f64_to_i64(a, mode, true); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename IntType = decltype(double_width::type::v) +> IntType +f_to_wi(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return (uint64_t)f32_to_i64(a, mode, true); + GEM5_UNREACHABLE; +} + +template< + typename IntType, + typename FloatType = typename double_widthf::type +> IntType +f_to_ni(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return (uint32_t)f64_to_i32(a, mode, true); + GEM5_UNREACHABLE; +} + +template +FloatType +ui_to_f(IntType a) +{ + if constexpr(std::is_same_v) + return ui32_to_f32(a); + else if constexpr(std::is_same_v) + return ui64_to_f64(a); + GEM5_UNREACHABLE; +} + +template< + typename IntType, + typename FloatType = typename double_widthf::type +> FloatType +ui_to_wf(IntType a) +{ + if constexpr(std::is_same_v) + return ui32_to_f64(a); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename IntType = decltype(double_width::type::v) +> FloatType +ui_to_nf(IntType a) +{ + if constexpr(std::is_same_v) + return ui64_to_f32(a); + GEM5_UNREACHABLE; +} + +template +FloatType +i_to_f(IntType a) +{ + if constexpr(std::is_same_v) + return i32_to_f32((int32_t)a); + else if constexpr(std::is_same_v) + return i64_to_f64((int64_t)a); + GEM5_UNREACHABLE; +} + +template< + typename IntType, + typename FloatType = typename double_widthf::type +> FloatType +i_to_wf(IntType a) +{ + if constexpr(std::is_same_v) + return i32_to_f64((int32_t)a); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename IntType = std::make_signed_t< + decltype(double_width::type::v) + > +> FloatType +i_to_nf(IntType a) +{ + if constexpr(std::is_same_v) + return i64_to_f32(a); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename FloatWType = typename double_width::type +> FloatWType +f_to_wf(FloatType a) +{ + if constexpr(std::is_same_v) + return f32_to_f64(a); + GEM5_UNREACHABLE; +} + +template< + typename FloatNType, + typename FloatType = typename double_width::type +> FloatNType +f_to_nf(FloatType a) +{ + if constexpr(std::is_same_v) + return f64_to_f32(a); + GEM5_UNREACHABLE; +} + +//ref: https://locklessinc.com/articles/sat_arithmetic/ +template T +sat_add(T x, T y, bool* sat) +{ + using UT = std::make_unsigned_t; + UT ux = x; + UT uy = y; + UT res = ux + uy; + + int sh = sizeof(T) * 8 - 1; + + ux = (ux >> sh) + (((UT)0x1 << sh) - 1); + + if ((T) ((ux ^ uy) | ~(uy ^ res)) >= 0) { + res = ux; + *sat = true; + } + return res; +} + +template T +sat_sub(T x, T y, bool* sat) +{ + using UT = std::make_unsigned_t; + UT ux = x; + UT uy = y; + UT res = ux - uy; + + int sh = sizeof(T) * 8 - 1; + + ux = (ux >> sh) + (((UT)0x1 << sh) - 1); + + if ((T) ((ux ^ uy) & (ux ^ res)) < 0) { + res = ux; + *sat = true; + } + return res; +} + +template T +sat_addu(T x, T y, bool* sat) +{ + T res = x + y; + + bool t = res < x; + if (false == *sat){ + *sat = t; + } + res |= -(res < x); + + return res; +} + +template T +sat_subu(T x, T y, bool* sat) +{ + T res = x - y; + + bool t = !(res <= x); + if (false == *sat){ + *sat = t; + } + + res &= -(res <= x); + + return res; +} + +/** + * Ref: + * https://github.com/riscv-software-src/riscv-isa-sim + */ +template T +int_rounding(T result, uint8_t xrm, unsigned gb) { + const uint64_t lsb = 1UL << gb; + const uint64_t lsb_half = lsb >> 1; + switch (xrm) { + case 0 /* RNU */: + result += lsb_half; + break; + case 1 /* RNE */: + if ((result & lsb_half) && + ((result & (lsb_half - 1)) || (result & lsb))) + result += lsb; + break; + case 2 /* RDN */: + break; + case 3 /* ROD */: + if (result & (lsb - 1)) + result |= lsb; + break; + default: + panic("Invalid xrm value %d", (int)xrm); + } + + return result; +} + } // namespace RiscvISA } // namespace gem5 diff --git a/src/cpu/FuncUnit.py b/src/cpu/FuncUnit.py index c5137ac970..c22f6423fc 100644 --- a/src/cpu/FuncUnit.py +++ b/src/cpu/FuncUnit.py @@ -95,6 +95,25 @@ class OpClass(Enum): "FloatMemWrite", "IprAccess", "InstPrefetch", + 'VectorUnitStrideLoad', + 'VectorUnitStrideStore', + 'VectorUnitStrideMaskLoad', + 'VectorUnitStrideMaskStore', + 'VectorStridedLoad', + 'VectorStridedStore', + 'VectorIndexedLoad', + 'VectorIndexedStore', + 'VectorUnitStrideFaultOnlyFirstLoad', + 'VectorWholeRegisterLoad', + 'VectorWholeRegisterStore', + 'VectorIntegerArith', + 'VectorFloatArith', + 'VectorFloatConvert', + 'VectorIntegerReduce', + 'VectorFloatReduce', + 'VectorMisc', + 'VectorIntegerExtension', + 'VectorConfig' ] diff --git a/src/cpu/minor/BaseMinorCPU.py b/src/cpu/minor/BaseMinorCPU.py index bcdab1bad5..9dc6b87709 100644 --- a/src/cpu/minor/BaseMinorCPU.py +++ b/src/cpu/minor/BaseMinorCPU.py @@ -244,6 +244,25 @@ class MinorDefaultMiscFU(MinorFU): opClasses = minorMakeOpClassSet(["IprAccess", "InstPrefetch"]) opLat = 1 +class MinorDefaultVecFU(MinorFU): + opClasses = minorMakeOpClassSet([ + 'VectorUnitStrideLoad', 'VectorUnitStrideStore', + 'VectorUnitStrideMaskLoad', 'VectorUnitStrideMaskStore', + 'VectorStridedLoad', 'VectorStridedStore', + 'VectorIndexedLoad', 'VectorIndexedStore', + 'VectorUnitStrideFaultOnlyFirstLoad', + 'VectorWholeRegisterLoad', 'VectorWholeRegisterStore', + 'VectorIntegerArith', 'VectorFloatArith', 'VectorFloatConvert', + 'VectorIntegerReduce', 'VectorFloatReduce', + 'VectorMisc', 'VectorIntegerExtension', 'VectorConfig' + ]) + opLat = 1 + +class MinorDefaultFUPool(MinorFUPool): + funcUnits = [MinorDefaultIntFU(), MinorDefaultIntFU(), + MinorDefaultIntMulFU(), MinorDefaultIntDivFU(), + MinorDefaultFloatSimdFU(), MinorDefaultPredFU(), + MinorDefaultMemFU(), MinorDefaultVecFU(), MinorDefaultMiscFU()] class MinorDefaultFUPool(MinorFUPool): funcUnits = [ diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc index 0ff0140518..dfac3cb76f 100644 --- a/src/cpu/minor/fetch2.cc +++ b/src/cpu/minor/fetch2.cc @@ -303,6 +303,7 @@ Fetch2::evaluate() unsigned int output_index = 0; + bool fetch2_stall = false; /* Pack instructions into the output while we can. This may involve * using more than one input line. Note that lineWidth will be 0 * for faulting lines */ @@ -310,7 +311,8 @@ Fetch2::evaluate() (line_in->isFault() || fetch_info.inputIndex < line_in->lineWidth) && /* More input */ output_index < outputWidth && /* More output to fill */ - prediction.isBubble() /* No predicted branch */) + prediction.isBubble() && /* No predicted branch */ + !fetch2_stall) { ThreadContext *thread = cpu.getContext(line_in->id.threadId); InstDecoder *decoder = thread->getDecoderPtr(); @@ -386,6 +388,7 @@ Fetch2::evaluate() line_in->lineBaseAddr + fetch_info.inputIndex); DPRINTF(Fetch, "Offering MachInst to decoder addr: 0x%x\n", line_in->lineBaseAddr + fetch_info.inputIndex); + fetch2_stall = decoder->isStalled(); } /* Maybe make the above a loop to accomodate ISAs with diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc index 49416bf754..d2c4044701 100644 --- a/src/cpu/o3/fetch.cc +++ b/src/cpu/o3/fetch.cc @@ -1198,11 +1198,13 @@ Fetch::fetch(bool &status_change) auto *dec_ptr = decoder[tid]; const Addr pc_mask = dec_ptr->pcMask(); + auto fetchStall = false; + // Loop through instruction memory from the cache. // Keep issuing while fetchWidth is available and branch is not // predicted taken while (numInst < fetchWidth && fetchQueue[tid].size() < fetchQueueSize - && !predictedBranch && !quiesce) { + && !predictedBranch && !quiesce && !fetchStall) { // We need to process more memory if we aren't going to get a // StaticInst from the rom, the current macroop, or what's already // in the decoder. @@ -1250,6 +1252,7 @@ Fetch::fetch(bool &status_change) pcOffset = 0; } } else { + fetchStall = dec_ptr->isStalled(); // We need more bytes for this instruction so blkOffset and // pcOffset will be updated break; diff --git a/src/cpu/op_class.hh b/src/cpu/op_class.hh index 94730f3d5d..d01f917da3 100644 --- a/src/cpu/op_class.hh +++ b/src/cpu/op_class.hh @@ -105,6 +105,30 @@ static const OpClass FloatMemReadOp = enums::FloatMemRead; static const OpClass FloatMemWriteOp = enums::FloatMemWrite; static const OpClass IprAccessOp = enums::IprAccess; static const OpClass InstPrefetchOp = enums::InstPrefetch; +static const OpClass VectorUnitStrideLoadOp = enums::VectorUnitStrideLoad; +static const OpClass VectorUnitStrideStoreOp = enums::VectorUnitStrideStore; +static const OpClass VectorUnitStrideMaskLoadOp + = enums::VectorUnitStrideMaskLoad; +static const OpClass VectorUnitStrideMaskStoreOp + = enums::VectorUnitStrideMaskStore; +static const OpClass VectorStridedLoadOp = enums::VectorStridedLoad; +static const OpClass VectorStridedStoreOp = enums::VectorStridedStore; +static const OpClass VectorIndexedLoadOp = enums::VectorIndexedLoad; +static const OpClass VectorIndexedStoreOp = enums::VectorIndexedStore; +static const OpClass VectorUnitStrideFaultOnlyFirstLoadOp + = enums::VectorUnitStrideFaultOnlyFirstLoad; +static const OpClass VectorWholeRegisterLoadOp + = enums::VectorWholeRegisterLoad; +static const OpClass VectorWholeRegisterStoreOp + = enums::VectorWholeRegisterStore; +static const OpClass VectorIntegerArithOp = enums::VectorIntegerArith; +static const OpClass VectorFloatArithOp = enums::VectorFloatArith; +static const OpClass VectorFloatConvertOp = enums::VectorFloatConvert; +static const OpClass VectorIntegerReduceOp = enums::VectorIntegerReduce; +static const OpClass VectorFloatReduceOp = enums::VectorFloatReduce; +static const OpClass VectorMiscOp = enums::VectorMisc; +static const OpClass VectorIntegerExtensionOp = enums::VectorIntegerExtension; +static const OpClass VectorConfigOp = enums::VectorConfig; static const OpClass Num_OpClasses = enums::Num_OpClass; } // namespace gem5