Skip to content

Commit f5ad9e1

Browse files
mahesh-attardemattarde
andauthored
[X86][AVX10.2] Support AVX10.2-COMEF new instructions. (#108063)
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 Chapter 8 AVX10 COMPARE SCALAR FP WITH ENHANCED EFLAGS INSTRUCTIONS --------- Co-authored-by: mattarde <[email protected]>
1 parent 707169a commit f5ad9e1

12 files changed

+1405
-88
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26159,22 +26159,43 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2615926159
if (CC == ISD::SETLT || CC == ISD::SETLE)
2616026160
std::swap(LHS, RHS);
2616126161

26162-
SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
26162+
// For AVX10.2, Support EQ and NE.
26163+
bool HasAVX10_2_COMX =
26164+
Subtarget.hasAVX10_2() && (CC == ISD::SETEQ || CC == ISD::SETNE);
26165+
26166+
// AVX10.2 COMPARE supports only v2f64, v4f32 or v8f16.
26167+
// For BF type we need to fall back.
26168+
bool HasAVX10_2_COMX_Ty = (LHS.getSimpleValueType() != MVT::v8bf16);
26169+
26170+
auto ComiOpCode = IntrData->Opc0;
26171+
auto isUnordered = (ComiOpCode == X86ISD::UCOMI);
26172+
26173+
if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty)
26174+
ComiOpCode = isUnordered ? X86ISD::UCOMX : X86ISD::COMX;
26175+
26176+
SDValue Comi = DAG.getNode(ComiOpCode, dl, MVT::i32, LHS, RHS);
26177+
2616326178
SDValue SetCC;
2616426179
switch (CC) {
26165-
case ISD::SETEQ: { // (ZF = 0 and PF = 0)
26180+
case ISD::SETEQ: {
2616626181
SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
26182+
if (HasAVX10_2_COMX & HasAVX10_2_COMX_Ty) // ZF == 1
26183+
break;
26184+
// (ZF = 1 and PF = 0)
2616726185
SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
2616826186
SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
2616926187
break;
2617026188
}
26171-
case ISD::SETNE: { // (ZF = 1 or PF = 1)
26189+
case ISD::SETNE: {
2617226190
SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
26191+
if (HasAVX10_2_COMX & HasAVX10_2_COMX_Ty) // ZF == 0
26192+
break;
26193+
// (ZF = 0 or PF = 1)
2617326194
SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
2617426195
SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
2617526196
break;
2617626197
}
26177-
case ISD::SETGT: // (CF = 0 and ZF = 0)
26198+
case ISD::SETGT: // (CF = 0 and ZF = 0)
2617826199
case ISD::SETLT: { // Condition opposite to GT. Operands swapped above.
2617926200
SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
2618026201
break;
@@ -34083,6 +34104,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3408334104
NODE_NAME_CASE(STRICT_FCMPS)
3408434105
NODE_NAME_CASE(COMI)
3408534106
NODE_NAME_CASE(UCOMI)
34107+
NODE_NAME_CASE(COMX)
34108+
NODE_NAME_CASE(UCOMX)
3408634109
NODE_NAME_CASE(CMPM)
3408734110
NODE_NAME_CASE(CMPMM)
3408834111
NODE_NAME_CASE(STRICT_CMPM)

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ namespace llvm {
8787
COMI,
8888
UCOMI,
8989

90+
// X86 compare with Intrinsics similar to COMI.
91+
COMX,
92+
UCOMX,
93+
9094
/// X86 bit-test instructions.
9195
BT,
9296

llvm/lib/Target/X86/X86InstrAVX10.td

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1537,3 +1537,49 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
15371537
defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
15381538
X86Fnmsub, SchedWriteFMA>;
15391539
}
1540+
1541+
//-------------------------------------------------
1542+
// AVX10 COMEF instructions
1543+
//-------------------------------------------------
1544+
multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
1545+
string OpcodeStr,
1546+
Domain d,
1547+
X86FoldableSchedWrite sched = WriteFComX> {
1548+
let ExeDomain = d, mayRaiseFPException = 1 in {
1549+
def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
1550+
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1551+
[(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2))]>,
1552+
EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
1553+
let mayLoad = 1 in {
1554+
def rm_Int : AVX512<Opc, MRMSrcMem, (outs), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1555+
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
1556+
[(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>,
1557+
EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
1558+
}
1559+
def rrb_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
1560+
!strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
1561+
[]>,
1562+
EVEX, EVEX_V128, EVEX_B, Sched<[sched]>, SIMD_EXC;
1563+
}
1564+
}
1565+
1566+
let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in {
1567+
defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
1568+
"vcomxsd", SSEPackedDouble>,
1569+
TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
1570+
defm VCOMXSHZ : avx10_com_ef_int<0x2f, v8f16x_info, X86comi512,
1571+
"vcomxsh", SSEPackedSingle>,
1572+
T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
1573+
defm VCOMXSSZ : avx10_com_ef_int<0x2f, v4f32x_info, X86comi512,
1574+
"vcomxss", SSEPackedSingle>,
1575+
TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1576+
defm VUCOMXSDZ : avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512,
1577+
"vucomxsd", SSEPackedDouble>,
1578+
TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
1579+
defm VUCOMXSHZ : avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512,
1580+
"vucomxsh", SSEPackedSingle>,
1581+
T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
1582+
defm VUCOMXSSZ : avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512,
1583+
"vucomxss", SSEPackedSingle>,
1584+
TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
1585+
}

llvm/lib/Target/X86/X86InstrFragmentsSIMD.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
6161
def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
6262
def X86comi : SDNode<"X86ISD::COMI", SDTX86FCmp>;
6363
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86FCmp>;
64-
64+
def X86comi512 : SDNode<"X86ISD::COMX", SDTX86FCmp>;
65+
def X86ucomi512 : SDNode<"X86ISD::UCOMX", SDTX86FCmp>;
6566
def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>,
6667
SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
6768
def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;

0 commit comments

Comments
 (0)