Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix #105969 #106218

Merged
merged 13 commits into from
Aug 15, 2024
55 changes: 7 additions & 48 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -2249,8 +2249,7 @@ class emitter
ssize_t emitGetInsCIdisp(instrDesc* id);
unsigned emitGetInsCIargs(instrDesc* id);

inline emitAttr emitGetMemOpSize(instrDesc* id) const;
inline emitAttr emitGetBaseMemOpSize(instrDesc*) const;
inline emitAttr emitGetMemOpSize(instrDesc* id, bool ignoreEmbeddedBroadcast = false) const;

// Return the argument count for a direct call "id".
int emitGetInsCDinfo(instrDesc* id);
Expand Down Expand Up @@ -3958,51 +3957,11 @@ inline unsigned emitter::emitGetInsCIargs(instrDesc* id)
//-----------------------------------------------------------------------------
// emitGetMemOpSize: Get the memory operand size of instrDesc.
//
// Note: there are cases when embedded broadcast is enabled, so the memory operand
// size is different from the intrinsic simd size, we will check here if emitter is
// emiting a embedded broadcast enabled instruction.

// Arguments:
// id - Instruction descriptor
//
emitAttr emitter::emitGetMemOpSize(instrDesc* id) const
{
if (id->idIsEvexbContextSet())
{
// should have the assumption that Evex.b now stands for the embedded broadcast context.
// reference: Section 2.7.5 in Intel 64 and ia-32 architectures software developer's manual volume 2.
ssize_t inputSize = GetInputSizeInBytes(id);
switch (inputSize)
{
case 4:
return EA_4BYTE;
case 8:
return EA_8BYTE;

default:
unreached();
}
}
else
{
return emitGetBaseMemOpSize(id);
}
}

//-----------------------------------------------------------------------------
// emitGetMemOpSize: Get the memory operand size of instrDesc.
//
// Note: vextractf128 has a 128-bit output (register or memory) but a 256-bit input (register).
// vinsertf128 is the inverse with a 256-bit output (register), a 256-bit input(register),
// and a 128-bit input (register or memory).
// Similarly, vextractf64x4 has a 256-bit output and 128-bit input and vinsertf64x4 the inverse
// This method is mainly used for such instructions to return the appropriate memory operand
// size, otherwise returns the regular operand size of the instruction.

// Arguments:
// id - Instruction descriptor
// id - Instruction descriptor
// ignoreEmbeddedBroadcast - true to get the non-embedded operand size; otherwise false
//
emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
emitAttr emitter::emitGetMemOpSize(instrDesc* id, bool ignoreEmbeddedBroadcast) const
{
ssize_t memSize = 0;

Expand All @@ -4018,7 +3977,7 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
else if (tupleType == INS_TT_FULL)
{
// Embedded broadcast supported, so either loading scalar or full vector
if (id->idIsEvexbContextSet())
if (id->idIsEvexbContextSet() && !ignoreEmbeddedBroadcast)
{
memSize = GetInputSizeInBytes(id);
}
Expand All @@ -4040,7 +3999,7 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
{
memSize = 16;
}
else if (id->idIsEvexbContextSet())
else if (id->idIsEvexbContextSet() && !ignoreEmbeddedBroadcast)
{
memSize = GetInputSizeInBytes(id);
}
Expand All @@ -4052,7 +4011,7 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
else if (tupleType == INS_TT_HALF)
{
// Embedded broadcast supported, so either loading scalar or half vector
if (id->idIsEvexbContextSet())
if (id->idIsEvexbContextSet() && !ignoreEmbeddedBroadcast)
{
memSize = GetInputSizeInBytes(id);
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10987,7 +10987,7 @@ void emitter::emitDispEmbBroadcastCount(instrDesc* id) const
return;
}
ssize_t baseSize = GetInputSizeInBytes(id);
ssize_t vectorSize = (ssize_t)emitGetBaseMemOpSize(id);
ssize_t vectorSize = (ssize_t)emitGetMemOpSize(id, /* ignoreEmbeddedBroadcast */ true);
printf(" {1to%d}", vectorSize / baseSize);
}

Expand Down
16 changes: 16 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20933,6 +20933,13 @@ GenTree* Compiler::gtNewSimdBinOpNode(
std::swap(op1, op2);
#endif // TARGET_XARCH
}
#ifdef TARGET_XARCH
if (HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsic) && varTypeIsSmall(simdBaseType))
{
simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT;
simdBaseType = JitType2PreciseVarType(simdBaseJitType);
}
#endif // TARGET_XARCH
return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize);
}

Expand Down Expand Up @@ -25691,6 +25698,15 @@ GenTree* Compiler::gtNewSimdTernaryLogicNode(var_types type,
intrinsic = NI_AVX512F_VL_TernaryLogic;
}

#ifdef TARGET_XARCH
assert(HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsic));
if (varTypeIsSmall(simdBaseType))
{
simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT;
simdBaseType = JitType2PreciseVarType(simdBaseJitType);
}
#endif // TARGET_XARCH

return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize);
}
#endif // TARGET_XARCH
Expand Down
14 changes: 14 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -6667,6 +6667,20 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic

bool ShouldConstantProp(GenTree* operand, GenTreeVecCon* vecCon);

void NormalizeJitBaseTypeToInt(NamedIntrinsic id, var_types simdBaseType)
{
assert(varTypeIsSmall(simdBaseType));

if (varTypeIsUnsigned(simdBaseType))
{
SetSimdBaseJitType(CORINFO_TYPE_UINT);
}
else
{
SetSimdBaseJitType(CORINFO_TYPE_UINT);
}
}

private:
void SetHWIntrinsicId(NamedIntrinsic intrinsicId);

Expand Down
7 changes: 7 additions & 0 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1818,6 +1818,13 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
if (simdBaseJitType != CORINFO_TYPE_UNDEF)
{
simdBaseType = JitType2PreciseVarType(simdBaseJitType);
#ifdef TARGET_XARCH
if (HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsic) && varTypeIsSmall(simdBaseType))
{
simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT;
simdBaseType = JitType2PreciseVarType(simdBaseJitType);
}
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
#endif // TARGET_XARCH
}

const unsigned simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ enum HWIntrinsicFlag : unsigned int

// The intrinsic is an embedded masking compatible intrinsic
HW_Flag_EmbMaskingCompatible = 0x10000000,

// The base type of this intrinsic needs to be normalized to int/uint unless it is long/ulong.
HW_Flag_NormalizeSmallTypeToInt = 0x20000000,
#elif defined(TARGET_ARM64)

// The intrinsic has an enum operand. Using this implies HW_Flag_HasImmediateOperand.
Expand Down Expand Up @@ -755,6 +758,12 @@ struct HWIntrinsicInfo
HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_MaybeMemoryStore) != 0;
}

static bool NeedsNormalizeSmallTypeToInt(NamedIntrinsic id)
{
HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_NormalizeSmallTypeToInt) != 0;
}
#endif

static bool NoJmpTableImm(NamedIntrinsic id)
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
// We need to validate that other phases of the compiler haven't introduced unsupported intrinsics
assert(compiler->compIsaSupportedDebugOnly(isa));
assert(HWIntrinsicInfo::RequiresCodegen(intrinsicId));
assert(!HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsicId) || !varTypeIsSmall(node->GetSimdBaseType()));

bool isTableDriven = genIsTableDrivenHWIntrinsic(intrinsicId, category);
insOpts instOptions = INS_OPTS_NONE;
Expand Down
Loading
Loading