-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86][AVX10.2] Support AVX10.2-COMEF new instructions. #108063
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mc Author: Mahesh-Attarde (mahesh-attarde) ChangesRef.: https://cdrdv2.intel.com/v1/dl/getContent/828965 Patch is 59.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108063.diff 12 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a1d466eee691c9..22d5e6a20c9d79 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26060,32 +26060,67 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (CC == ISD::SETLT || CC == ISD::SETLE)
std::swap(LHS, RHS);
- SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
+ // For AVX10.2, Support EQ and NE
+ bool HasAVX10_2_COMX =
+ Subtarget.hasAVX10_2() && (CC == ISD::SETEQ || CC == ISD::SETNE);
+
+ // AVX10.2 COMPARE supports only v2f64, v4f32 or v8f16
+ auto SVT = LHS.getSimpleValueType();
+ bool HasAVX10_2_COMX_Ty =
+ (SVT == MVT::v2f64) || (SVT == MVT::v4f32) || (SVT == MVT::v8f16);
+
+ auto ComiOpCode = IntrData->Opc0;
+ auto isUnordered = (ComiOpCode == X86ISD::UCOMI);
+
+ if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty)
+ ComiOpCode = isUnordered ? X86ISD::UCOMX : X86ISD::COMX;
+
+ SDValue Comi = DAG.getNode(ComiOpCode, dl, MVT::i32, LHS, RHS);
+
SDValue SetCC;
- switch (CC) {
- case ISD::SETEQ: { // (ZF = 0 and PF = 0)
- SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
- SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
- SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
- break;
- }
- case ISD::SETNE: { // (ZF = 1 or PF = 1)
- SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
- SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
- SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
- break;
- }
- case ISD::SETGT: // (CF = 0 and ZF = 0)
- case ISD::SETLT: { // Condition opposite to GT. Operands swapped above.
- SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
- break;
- }
- case ISD::SETGE: // CF = 0
- case ISD::SETLE: // Condition opposite to GE. Operands swapped above.
- SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG);
- break;
- default:
- llvm_unreachable("Unexpected illegal condition!");
+ if (HasAVX10_2_COMX & HasAVX10_2_COMX_Ty) {
+ switch (CC) {
+ case ISD::SETEQ: { // (ZF)
+ SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
+ break;
+ }
+ case ISD::SETNE: { // (!ZF)
+ SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
+ break;
+ }
+ case ISD::SETGT:
+ case ISD::SETLT:
+ case ISD::SETGE:
+ case ISD::SETLE:
+ default:
+ llvm_unreachable("Un-implemented condition!");
+ }
+ } else {
+ switch (CC) {
+ case ISD::SETEQ: { // (ZF = 0 and PF = 0)
+ SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
+ SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
+ SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
+ break;
+ }
+ case ISD::SETNE: { // (ZF = 1 or PF = 1)
+ SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
+ SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
+ SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
+ break;
+ }
+ case ISD::SETGT: // (CF = 0 and ZF = 0)
+ case ISD::SETLT: { // Condition opposite to GT. Operands swapped above.
+ SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
+ break;
+ }
+ case ISD::SETGE: // CF = 0
+ case ISD::SETLE: // Condition opposite to GE. Operands swapped above.
+ SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG);
+ break;
+ default:
+ llvm_unreachable("Unexpected illegal condition!");
+ }
}
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
@@ -33845,6 +33880,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(STRICT_FCMPS)
NODE_NAME_CASE(COMI)
NODE_NAME_CASE(UCOMI)
+ NODE_NAME_CASE(COMX)
+ NODE_NAME_CASE(UCOMX)
NODE_NAME_CASE(CMPM)
NODE_NAME_CASE(CMPMM)
NODE_NAME_CASE(STRICT_CMPM)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 93d2b3e65742b2..cf9125dd9c3ccf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -87,6 +87,10 @@ namespace llvm {
COMI,
UCOMI,
+ // X86 compare with Intrinsics similar to COMI
+ COMX,
+ UCOMX,
+
/// X86 bit-test instructions.
BT,
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index b0eb210b687b19..b2c93455c95de2 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1225,3 +1225,45 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
X86Fnmsub, SchedWriteFMA>;
}
+
+//-------------------------------------------------
+// AVX10 COMEF instructions
+//-------------------------------------------------
+multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
+ string OpcodeStr,
+ Domain d,
+ X86FoldableSchedWrite sched = WriteFComX> {
+ let ExeDomain = d in {
+ def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2))]>,
+ EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+ let mayLoad = 1 in {
+ def rm_Int : AVX512<Opc, MRMSrcMem, (outs), (ins _.RC:$src1, _.ScalarMemOp:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>,
+ EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+ }
+ }
+}
+
+let Defs = [EFLAGS], Predicates = [HasAVX10_2] in {
+ defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
+ "vcomxsd", SSEPackedDouble>,
+ TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+ defm VCOMXSHZ : avx10_com_ef_int<0x2f, v8f16x_info, X86comi512,
+ "vcomxsh", SSEPackedSingle>,
+ T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
+ defm VCOMXSSZ : avx10_com_ef_int<0x2f, v4f32x_info, X86comi512,
+ "vcomxss", SSEPackedSingle>,
+ TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ defm VUCOMXSDZ : avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512,
+ "vucomxsd", SSEPackedDouble>,
+ TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+ defm VUCOMXSHZ : avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512,
+ "vucomxsh", SSEPackedSingle>,
+ T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
+ defm VUCOMXSSZ : avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512,
+ "vucomxss", SSEPackedSingle>,
+ TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 59bfd2bcbabc26..fb6920042734a1 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -61,7 +61,8 @@ def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86FCmp>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86FCmp>;
-
+def X86comi512 : SDNode<"X86ISD::COMX", SDTX86FCmp>;
+def X86ucomi512 : SDNode<"X86ISD::UCOMX", SDTX86FCmp>;
def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
diff --git a/llvm/test/CodeGen/X86/comi-flags.ll b/llvm/test/CodeGen/X86/comi-flags.ll
index 8b7a089f0ce872..6f520aa57dcd09 100644
--- a/llvm/test/CodeGen/X86/comi-flags.ll
+++ b/llvm/test/CodeGen/X86/comi-flags.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,NO-AVX10_2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX,AVX10_2
;
; SSE
@@ -17,15 +18,22 @@ define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i3
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_comieq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_comieq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_comieq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -126,13 +134,20 @@ define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_comineq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_comineq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_comineq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -151,15 +166,22 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_ucomieq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vucomiss %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_ucomieq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vucomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_ucomieq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -260,13 +282,20 @@ define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2,
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_ucomineq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vucomiss %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_ucomineq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vucomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_ucomineq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -289,15 +318,22 @@ define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2,
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_comieq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vcomisd %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_comieq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vcomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_comieq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -398,13 +434,20 @@ define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_comineq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vcomisd %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_comineq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vcomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_comineq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -423,15 +466,22 @@ define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_ucomieq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vucomisd %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_ucomieq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vucomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_ucomieq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -532,13 +582,20 @@ define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_ucomineq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vucomisd %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_ucomineq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vucomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_ucomineq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -557,15 +614,22 @@ define void @PR38960_eq(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: # %bb.1: # %if.end
; SSE-NEXT: retq
;
-; AVX-LABEL: PR38960_eq:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: setnp %al
-; AVX-NEXT: sete %cl
-; AVX-NEXT: testb %al, %cl
-; AVX-NEXT: jne foo@PLT # TAILCALL
-; AVX-NEXT: # %bb.1: # %if.end
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: PR38960_eq:
+; NO-AVX10_2: # %bb.0: # %entry
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %al
+; NO-AVX10_2-NEXT: sete %cl
+; NO-AVX10_2-NEXT: testb %al, %cl
+; NO-AVX10_2-NEXT: jne foo@PLT # TAILCALL
+; NO-AVX10_2-NEXT: # %bb.1: # %if.end
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: PR38960_eq:
+; AVX10_2: # %bb.0: # %entry
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: je foo@PLT # TAILCALL
+; AVX10_2-NEXT: # %bb.1: # %if.end
+; AVX10_2-NEXT: retq
entry:
%call = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %A, <4 x float> %B) #3
%cmp = icmp eq i32 %call, 0
@@ -590,15 +654,22 @@ define void @PR38960_neq(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: # %bb.1: # %if.end
; SSE-NEXT: retq
;
-; AVX-LABEL: PR38960_neq:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: setp %al
-; AVX-NEXT: setne %cl
-; AVX-NEXT: orb %al, %cl
-; AVX-NEXT: jne foo@PLT # TAILCALL
-; AVX-NEXT: # %bb.1: # %if.end
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: PR38960_neq:
+; NO-AVX10_2: # %bb.0: # %entry
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setp %al
+; NO-AVX10_2-NEXT: setne %cl
+; NO-AVX10_2-NEXT: orb %al, %cl
+; NO-AVX10_2-NEXT: jne foo@PLT # TAILCALL
+; NO-AVX10_2-NEXT: # %bb.1: # %if.end
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: PR38960_neq:
+; AVX10_2: # %bb.0: # %entry
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: jne foo@PLT # TAILCALL
+; AVX10_2-NEXT: # %bb.1: # %if.end
+; AVX10_2-NEXT: retq
entry:
%call = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %A, <4 x float> %B) #3
%cmp = icmp eq i32 %call, 0
diff --git a/llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt b/llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt
new file mode 100644
index 00000000000000..f762601c9f6221
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt
@@ -0,0 +1,174 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vcomxsd %xmm3, %xmm2
+# INTEL: vcomxsd xmm2, xmm3
+0x62,0xf1,0xfe,0x08,0x2f,0xd3
+
+# ATT...
[truncated]
|
@llvm/pr-subscribers-backend-x86 Author: Mahesh-Attarde (mahesh-attarde) ChangesRef.: https://cdrdv2.intel.com/v1/dl/getContent/828965 Patch is 59.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108063.diff 12 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a1d466eee691c9..22d5e6a20c9d79 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26060,32 +26060,67 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (CC == ISD::SETLT || CC == ISD::SETLE)
std::swap(LHS, RHS);
- SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS);
+ // For AVX10.2, Support EQ and NE
+ bool HasAVX10_2_COMX =
+ Subtarget.hasAVX10_2() && (CC == ISD::SETEQ || CC == ISD::SETNE);
+
+ // AVX10.2 COMPARE supports only v2f64, v4f32 or v8f16
+ auto SVT = LHS.getSimpleValueType();
+ bool HasAVX10_2_COMX_Ty =
+ (SVT == MVT::v2f64) || (SVT == MVT::v4f32) || (SVT == MVT::v8f16);
+
+ auto ComiOpCode = IntrData->Opc0;
+ auto isUnordered = (ComiOpCode == X86ISD::UCOMI);
+
+ if (HasAVX10_2_COMX && HasAVX10_2_COMX_Ty)
+ ComiOpCode = isUnordered ? X86ISD::UCOMX : X86ISD::COMX;
+
+ SDValue Comi = DAG.getNode(ComiOpCode, dl, MVT::i32, LHS, RHS);
+
SDValue SetCC;
- switch (CC) {
- case ISD::SETEQ: { // (ZF = 0 and PF = 0)
- SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
- SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
- SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
- break;
- }
- case ISD::SETNE: { // (ZF = 1 or PF = 1)
- SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
- SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
- SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
- break;
- }
- case ISD::SETGT: // (CF = 0 and ZF = 0)
- case ISD::SETLT: { // Condition opposite to GT. Operands swapped above.
- SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
- break;
- }
- case ISD::SETGE: // CF = 0
- case ISD::SETLE: // Condition opposite to GE. Operands swapped above.
- SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG);
- break;
- default:
- llvm_unreachable("Unexpected illegal condition!");
+ if (HasAVX10_2_COMX & HasAVX10_2_COMX_Ty) {
+ switch (CC) {
+ case ISD::SETEQ: { // (ZF)
+ SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
+ break;
+ }
+ case ISD::SETNE: { // (!ZF)
+ SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
+ break;
+ }
+ case ISD::SETGT:
+ case ISD::SETLT:
+ case ISD::SETGE:
+ case ISD::SETLE:
+ default:
+ llvm_unreachable("Un-implemented condition!");
+ }
+ } else {
+ switch (CC) {
+ case ISD::SETEQ: { // (ZF = 0 and PF = 0)
+ SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
+ SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
+ SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
+ break;
+ }
+ case ISD::SETNE: { // (ZF = 1 or PF = 1)
+ SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
+ SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
+ SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
+ break;
+ }
+ case ISD::SETGT: // (CF = 0 and ZF = 0)
+ case ISD::SETLT: { // Condition opposite to GT. Operands swapped above.
+ SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
+ break;
+ }
+ case ISD::SETGE: // CF = 0
+ case ISD::SETLE: // Condition opposite to GE. Operands swapped above.
+ SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG);
+ break;
+ default:
+ llvm_unreachable("Unexpected illegal condition!");
+ }
}
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
@@ -33845,6 +33880,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(STRICT_FCMPS)
NODE_NAME_CASE(COMI)
NODE_NAME_CASE(UCOMI)
+ NODE_NAME_CASE(COMX)
+ NODE_NAME_CASE(UCOMX)
NODE_NAME_CASE(CMPM)
NODE_NAME_CASE(CMPMM)
NODE_NAME_CASE(STRICT_CMPM)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 93d2b3e65742b2..cf9125dd9c3ccf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -87,6 +87,10 @@ namespace llvm {
COMI,
UCOMI,
+ // X86 compare with Intrinsics similar to COMI
+ COMX,
+ UCOMX,
+
/// X86 bit-test instructions.
BT,
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index b0eb210b687b19..b2c93455c95de2 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1225,3 +1225,45 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
X86Fnmsub, SchedWriteFMA>;
}
+
+//-------------------------------------------------
+// AVX10 COMEF instructions
+//-------------------------------------------------
+multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
+ string OpcodeStr,
+ Domain d,
+ X86FoldableSchedWrite sched = WriteFComX> {
+ let ExeDomain = d in {
+ def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2))]>,
+ EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+ let mayLoad = 1 in {
+ def rm_Int : AVX512<Opc, MRMSrcMem, (outs), (ins _.RC:$src1, _.ScalarMemOp:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>,
+ EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+ }
+ }
+}
+
+let Defs = [EFLAGS], Predicates = [HasAVX10_2] in {
+ defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
+ "vcomxsd", SSEPackedDouble>,
+ TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+ defm VCOMXSHZ : avx10_com_ef_int<0x2f, v8f16x_info, X86comi512,
+ "vcomxsh", SSEPackedSingle>,
+ T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
+ defm VCOMXSSZ : avx10_com_ef_int<0x2f, v4f32x_info, X86comi512,
+ "vcomxss", SSEPackedSingle>,
+ TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ defm VUCOMXSDZ : avx10_com_ef_int<0x2e, v2f64x_info, X86ucomi512,
+ "vucomxsd", SSEPackedDouble>,
+ TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+ defm VUCOMXSHZ : avx10_com_ef_int<0x2e, v8f16x_info, X86ucomi512,
+ "vucomxsh", SSEPackedSingle>,
+ T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
+ defm VUCOMXSSZ : avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512,
+ "vucomxss", SSEPackedSingle>,
+ TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 59bfd2bcbabc26..fb6920042734a1 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -61,7 +61,8 @@ def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86FCmp>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86FCmp>;
-
+def X86comi512 : SDNode<"X86ISD::COMX", SDTX86FCmp>;
+def X86ucomi512 : SDNode<"X86ISD::UCOMX", SDTX86FCmp>;
def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
diff --git a/llvm/test/CodeGen/X86/comi-flags.ll b/llvm/test/CodeGen/X86/comi-flags.ll
index 8b7a089f0ce872..6f520aa57dcd09 100644
--- a/llvm/test/CodeGen/X86/comi-flags.ll
+++ b/llvm/test/CodeGen/X86/comi-flags.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,NO-AVX10_2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX,AVX10_2
;
; SSE
@@ -17,15 +18,22 @@ define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i3
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_comieq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_comieq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_comieq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -126,13 +134,20 @@ define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_comineq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_comineq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_comineq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -151,15 +166,22 @@ define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2, i
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_ucomieq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vucomiss %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_ucomieq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vucomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_ucomieq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -260,13 +282,20 @@ define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1, i32 %a2,
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse_ucomineq_ss:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vucomiss %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse_ucomineq_ss:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vucomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse_ucomineq_ss:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxss %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1)
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -289,15 +318,22 @@ define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2,
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_comieq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vcomisd %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_comieq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vcomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_comieq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -398,13 +434,20 @@ define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_comineq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vcomisd %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_comineq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vcomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_comineq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vcomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -423,15 +466,22 @@ define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a2
; SSE-NEXT: cmovnel %esi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_ucomieq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vucomisd %xmm1, %xmm0
-; AVX-NEXT: setnp %cl
-; AVX-NEXT: sete %dl
-; AVX-NEXT: testb %cl, %dl
-; AVX-NEXT: cmovnel %esi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_ucomieq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %edi, %eax
+; NO-AVX10_2-NEXT: vucomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %cl
+; NO-AVX10_2-NEXT: sete %dl
+; NO-AVX10_2-NEXT: testb %cl, %dl
+; NO-AVX10_2-NEXT: cmovnel %esi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_ucomieq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp eq i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -532,13 +582,20 @@ define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1, i32 %a
; SSE-NEXT: cmovpl %edi, %eax
; SSE-NEXT: retq
;
-; AVX-LABEL: test_x86_sse2_ucomineq_sd:
-; AVX: # %bb.0:
-; AVX-NEXT: movl %esi, %eax
-; AVX-NEXT: vucomisd %xmm1, %xmm0
-; AVX-NEXT: cmovnel %edi, %eax
-; AVX-NEXT: cmovpl %edi, %eax
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: test_x86_sse2_ucomineq_sd:
+; NO-AVX10_2: # %bb.0:
+; NO-AVX10_2-NEXT: movl %esi, %eax
+; NO-AVX10_2-NEXT: vucomisd %xmm1, %xmm0
+; NO-AVX10_2-NEXT: cmovnel %edi, %eax
+; NO-AVX10_2-NEXT: cmovpl %edi, %eax
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: test_x86_sse2_ucomineq_sd:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: movl %edi, %eax
+; AVX10_2-NEXT: vucomxsd %xmm1, %xmm0
+; AVX10_2-NEXT: cmovel %esi, %eax
+; AVX10_2-NEXT: retq
%call = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
%cmp = icmp ne i32 %call, 0
%res = select i1 %cmp, i32 %a2, i32 %a3
@@ -557,15 +614,22 @@ define void @PR38960_eq(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: # %bb.1: # %if.end
; SSE-NEXT: retq
;
-; AVX-LABEL: PR38960_eq:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: setnp %al
-; AVX-NEXT: sete %cl
-; AVX-NEXT: testb %al, %cl
-; AVX-NEXT: jne foo@PLT # TAILCALL
-; AVX-NEXT: # %bb.1: # %if.end
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: PR38960_eq:
+; NO-AVX10_2: # %bb.0: # %entry
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setnp %al
+; NO-AVX10_2-NEXT: sete %cl
+; NO-AVX10_2-NEXT: testb %al, %cl
+; NO-AVX10_2-NEXT: jne foo@PLT # TAILCALL
+; NO-AVX10_2-NEXT: # %bb.1: # %if.end
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: PR38960_eq:
+; AVX10_2: # %bb.0: # %entry
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: je foo@PLT # TAILCALL
+; AVX10_2-NEXT: # %bb.1: # %if.end
+; AVX10_2-NEXT: retq
entry:
%call = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %A, <4 x float> %B) #3
%cmp = icmp eq i32 %call, 0
@@ -590,15 +654,22 @@ define void @PR38960_neq(<4 x float> %A, <4 x float> %B) {
; SSE-NEXT: # %bb.1: # %if.end
; SSE-NEXT: retq
;
-; AVX-LABEL: PR38960_neq:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vcomiss %xmm1, %xmm0
-; AVX-NEXT: setp %al
-; AVX-NEXT: setne %cl
-; AVX-NEXT: orb %al, %cl
-; AVX-NEXT: jne foo@PLT # TAILCALL
-; AVX-NEXT: # %bb.1: # %if.end
-; AVX-NEXT: retq
+; NO-AVX10_2-LABEL: PR38960_neq:
+; NO-AVX10_2: # %bb.0: # %entry
+; NO-AVX10_2-NEXT: vcomiss %xmm1, %xmm0
+; NO-AVX10_2-NEXT: setp %al
+; NO-AVX10_2-NEXT: setne %cl
+; NO-AVX10_2-NEXT: orb %al, %cl
+; NO-AVX10_2-NEXT: jne foo@PLT # TAILCALL
+; NO-AVX10_2-NEXT: # %bb.1: # %if.end
+; NO-AVX10_2-NEXT: retq
+;
+; AVX10_2-LABEL: PR38960_neq:
+; AVX10_2: # %bb.0: # %entry
+; AVX10_2-NEXT: vcomxss %xmm1, %xmm0
+; AVX10_2-NEXT: jne foo@PLT # TAILCALL
+; AVX10_2-NEXT: # %bb.1: # %if.end
+; AVX10_2-NEXT: retq
entry:
%call = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %A, <4 x float> %B) #3
%cmp = icmp eq i32 %call, 0
diff --git a/llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt b/llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt
new file mode 100644
index 00000000000000..f762601c9f6221
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx512-com-ef-32.txt
@@ -0,0 +1,174 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vcomxsd %xmm3, %xmm2
+# INTEL: vcomxsd xmm2, xmm3
+0x62,0xf1,0xfe,0x08,0x2f,0xd3
+
+# ATT...
[truncated]
|
@phoebewang @FreddyLeaf @KanRobert |
bool HasAVX10_2_COMX_Ty = | ||
(SVT == MVT::v2f64) || (SVT == MVT::v4f32) || (SVT == MVT::v8f16); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The intrinsic doesn't support other types, you can assert for them though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is part of existing testcase in comi-flags.ll. It works without AVX10_2. Assertion would lead to crash.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Only <4 x float>
and <2 x double>
are declared in comi-flags.ll, so it won't crash.
I mean the check in not necessary. Remove it without adding assertion is also fine.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This check serves two purpose.
- Addresses limitation of VCOMX to SS,SH and SD.
- For other Datatype it falls back to original VCOMI sequence.
In absence of this, test_int_x86_avx10_maskz_max_nepbf16_128 from comi-flags.ll fails to select
LLVM ERROR: Cannot select: t13: i32 = X86ISD::COMX t5, t6
t5: v8bf16 = bitcast t2
t2: v8f16,ch = CopyFromReg t0, Register:v8f16 %0
t1: v8f16 = Register %0
t6: v8bf16 = bitcast t4
t4: v8f16,ch = CopyFromReg t0, Register:v8f16 %1
t3: v8f16 = Register %1
In function: test_x86_avx10_com_nesbf16_eq
so we need to keep this check and not use assertion.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, I forgot it. Given this, we just need to exclude v8bf16
.
case ISD::SETEQ: { // (ZF = 0 and PF = 0) | ||
SetCC = getSETCC(X86::COND_E, Comi, dl, DAG); | ||
SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG); | ||
SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP); | ||
break; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about change like this:
case ISD::SETEQ: {
SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
if (Subtarget.hasAVX10_2()) // ZF == 1
break;
// (ZF = 1 and PF = 0)
SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
break;
}
Note: The prior comment was wrong.
case ISD::SETNE: { // (ZF = 1 or PF = 1) | ||
SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG); | ||
SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG); | ||
SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP); | ||
break; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ditto. Leave below unchanged.
} | ||
|
||
let Defs = [EFLAGS], Predicates = [HasAVX10_2] in { | ||
defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We still need to define the SAE variants here. We just don't need define patterns for them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we dont have separate record for VCOMXSDZ and VCOMXSDZ_SAE.
We used VCOMXSDZrr_Int and VCOMXSDZrrb_Int to separate out SAE version.
definition of rrb_int used mayRaiseFPException=0 and It required SAE node pattern. SAE node pattern was deleted since we had no way to reach/exercise it from COMI Type.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Define mayRaiseFPException=0
is not a problem, and we can leave pattern blank, see avx512_ord_cmp_sae
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please also add enc/dec tests.
@@ -0,0 +1,196 @@ | |||
# REQUIRES: intel_feature_isa_avx512_com_ef |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove this. The same below.
llvm/lib/Target/X86/X86InstrAVX10.td
Outdated
} | ||
} | ||
|
||
let Defs = [EFLAGS], Predicates = [HasAVX10_2] in { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Put let Uses = [MXCSR]
here, because all variants need it.
llvm/lib/Target/X86/X86InstrAVX10.td
Outdated
[(set EFLAGS, (OpNode (_.VT _.RC:$src1), (_.LdFrag addr:$src2)))]>, | ||
EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; | ||
} | ||
let Uses = [MXCSR], mayRaiseFPException = 0 in { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Put mayRaiseFPException = 1
on rr_Int
and rm_Int
and remove this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG); | ||
if (HasAVX10_2_COMX & HasAVX10_2_COMX_Ty) // ZF == 0 | ||
break; | ||
// (ZF = 1 or PF = 0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(ZF = 0 or PF = 1)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
llvm/lib/Target/X86/X86InstrAVX10.td
Outdated
defm VUCOMXSSZ : avx10_com_ef_int<0x2e, v4f32x_info, X86ucomi512, | ||
"vucomxss", SSEPackedSingle>, | ||
TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a new line.
Domain d, | ||
X86FoldableSchedWrite sched = WriteFComX> { | ||
let ExeDomain = d, mayRaiseFPException = 1 in { | ||
def rr_Int : AVX512<Opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We also need the rr
and rm
definations for optimization like https://godbolt.org/z/d3sGofdTs
, but I think we can leave it for a second patch. Just add a TODO is ok to me.
@@ -26060,22 +26060,43 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, | |||
if (CC == ISD::SETLT || CC == ISD::SETLE) | |||
std::swap(LHS, RHS); | |||
|
|||
SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS); | |||
// For AVX10.2, Support EQ and NE |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add the ending .
for comments. The same below.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
@FreddyLeaf @KanRobert Can you review please? |
0af3b5a
to
34efa73
Compare
@phoebewang can you merge please? |
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965 Chapter 8 AVX10 COMPARE SCALAR FP WITH ENHANCED EFLAGS INSTRUCTIONS --------- Co-authored-by: mattarde <[email protected]>
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965
Chapter 8 AVX10 COMPARE SCALAR FP WITH ENHANCED EFLAGS INSTRUCTIONS