-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] SimplifyDemandedVectorEltsForTargetNode - replace packed fcmp node with scalar fcmp node if only element0 is demanded #140563
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…ode with scalar fcmp node if only elemnt0 is demanded These unnecessary vectorisation can appear due to fplogic opcodes only being available for 128-bit types - which can prevent folds that only work on the scalar source types and also lead to fcmp of garbage data in the upper elements. Fixes llvm#140534
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesThese unnecessary vectorisation can appear due to fplogic opcodes only being available for 128-bit types - which can prevent folds that only work on the scalar source types and also lead to fcmp of garbage data in the upper elements. Fixes #140534 Patch is 24.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140563.diff 9 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3b6b0d7b86c9c..012319197a65e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43450,6 +43450,28 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
KnownZero = LHSZero;
break;
}
+ case X86ISD::CMPM:
+ case X86ISD::CMPP: {
+ // Scalarize packed fp comparison if we only require element 0.
+ if (DemandedElts == 1) {
+ SDLoc dl(Op);
+ MVT VT = Op.getSimpleValueType();
+ MVT OpSVT = Op.getOperand(0).getSimpleValueType().getScalarType();
+ SDValue LHS = TLO.DAG.getExtractVectorElt(dl, OpSVT, Op.getOperand(0), 0);
+ SDValue RHS = TLO.DAG.getExtractVectorElt(dl, OpSVT, Op.getOperand(1), 0);
+ SDValue CC = Op.getOperand(2);
+ if (Opc == X86ISD::CMPM) {
+ SDValue Cmp =
+ TLO.DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, CC);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getInsertSubvector(dl, TLO.DAG.getUNDEF(VT), Cmp, 0));
+ }
+ SDValue Cmp = TLO.DAG.getNode(X86ISD::FSETCC, dl, OpSVT, LHS, RHS, CC);
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Cmp));
+ }
+ break;
+ }
case X86ISD::PCMPEQ:
case X86ISD::PCMPGT: {
APInt LHSUndef, LHSZero;
diff --git a/llvm/test/CodeGen/X86/and-or-setcc.ll b/llvm/test/CodeGen/X86/and-or-setcc.ll
index a6a9362908811..4484f23bbda36 100644
--- a/llvm/test/CodeGen/X86/and-or-setcc.ll
+++ b/llvm/test/CodeGen/X86/and-or-setcc.ll
@@ -17,8 +17,8 @@ define i1 @and_ord(float %a, float %b) {
; X64-LABEL: and_ord:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm2, %xmm2
-; X64-NEXT: cmpordps %xmm2, %xmm1
-; X64-NEXT: cmpordps %xmm2, %xmm0
+; X64-NEXT: cmpordss %xmm2, %xmm1
+; X64-NEXT: cmpordss %xmm2, %xmm0
; X64-NEXT: andps %xmm1, %xmm0
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
@@ -44,8 +44,8 @@ define i1 @or_uno(float %a, float %b) {
; X64-LABEL: or_uno:
; X64: # %bb.0:
; X64-NEXT: xorps %xmm2, %xmm2
-; X64-NEXT: cmpunordps %xmm2, %xmm1
-; X64-NEXT: cmpunordps %xmm2, %xmm0
+; X64-NEXT: cmpunordss %xmm2, %xmm1
+; X64-NEXT: cmpunordss %xmm2, %xmm0
; X64-NEXT: orps %xmm1, %xmm0
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
diff --git a/llvm/test/CodeGen/X86/extract-vselect-setcc.ll b/llvm/test/CodeGen/X86/extract-vselect-setcc.ll
index 81ab104cab283..96c8e773d5edd 100644
--- a/llvm/test/CodeGen/X86/extract-vselect-setcc.ll
+++ b/llvm/test/CodeGen/X86/extract-vselect-setcc.ll
@@ -5,10 +5,9 @@ define void @PR117684(i1 %cond, <8 x float> %vec, ptr %ptr1, ptr %ptr2) #0 {
; CHECK-LABEL: PR117684:
; CHECK: # %bb.0:
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %k1
+; CHECK-NEXT: vcmpnltss %xmm1, %xmm0, %k1
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
; CHECK-NEXT: vinsertf32x4 $0, %xmm0, %ymm0, %ymm0 {%k1} {z}
-; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; CHECK-NEXT: vbroadcastss %xmm2, %ymm2
diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll
index 944f6bbfd0bfb..1706f17eac165 100644
--- a/llvm/test/CodeGen/X86/extractelement-fp.ll
+++ b/llvm/test/CodeGen/X86/extractelement-fp.ll
@@ -319,7 +319,7 @@ define void @extsetcc(<4 x float> %x) {
define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
; X64-LABEL: extvselectsetcc_crash:
; X64: # %bb.0:
-; X64-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; X64-NEXT: vcmpeqsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; X64-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
; X64-NEXT: vandpd %xmm2, %xmm1, %xmm1
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
@@ -328,7 +328,7 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) {
;
; X86-LABEL: extvselectsetcc_crash:
; X86: # %bb.0:
-; X86-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
+; X86-NEXT: vcmpeqsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
; X86-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
; X86-NEXT: vandpd %xmm2, %xmm1, %xmm1
; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
diff --git a/llvm/test/CodeGen/X86/fcmp-logic.ll b/llvm/test/CodeGen/X86/fcmp-logic.ll
index 794b0ad92aef6..7b806bca43c2e 100644
--- a/llvm/test/CodeGen/X86/fcmp-logic.ll
+++ b/llvm/test/CodeGen/X86/fcmp-logic.ll
@@ -6,8 +6,8 @@
define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) {
; SSE2-LABEL: olt_ole_and_f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmpleps %xmm3, %xmm2
-; SSE2-NEXT: cmpltps %xmm1, %xmm0
+; SSE2-NEXT: cmpless %xmm3, %xmm2
+; SSE2-NEXT: cmpltss %xmm1, %xmm0
; SSE2-NEXT: andps %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -15,8 +15,8 @@ define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) {
;
; AVX1-LABEL: olt_ole_and_f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpleps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vcmpless %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpltss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -24,15 +24,11 @@ define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) {
;
; AVX512-LABEL: olt_ole_and_f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpltps %zmm1, %zmm0, %k1
-; AVX512-NEXT: vcmpleps %zmm3, %zmm2, %k0 {%k1}
+; AVX512-NEXT: vcmpless %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpltss %xmm1, %xmm0, %k1
+; AVX512-NEXT: kandw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp olt float %w, %x
%f2 = fcmp ole float %y, %z
@@ -43,8 +39,8 @@ define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) {
define i1 @oge_oeq_or_f32(float %w, float %x, float %y, float %z) {
; SSE2-LABEL: oge_oeq_or_f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmpeqps %xmm3, %xmm2
-; SSE2-NEXT: cmpleps %xmm0, %xmm1
+; SSE2-NEXT: cmpeqss %xmm3, %xmm2
+; SSE2-NEXT: cmpless %xmm0, %xmm1
; SSE2-NEXT: orps %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -52,8 +48,8 @@ define i1 @oge_oeq_or_f32(float %w, float %x, float %y, float %z) {
;
; AVX1-LABEL: oge_oeq_or_f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpeqps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpleps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vcmpeqss %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpless %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vorps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -61,16 +57,11 @@ define i1 @oge_oeq_or_f32(float %w, float %x, float %y, float %z) {
;
; AVX512-LABEL: oge_oeq_or_f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpeqps %zmm3, %zmm2, %k0
-; AVX512-NEXT: vcmpleps %zmm0, %zmm1, %k1
+; AVX512-NEXT: vcmpeqss %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpless %xmm0, %xmm1, %k1
; AVX512-NEXT: korw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp oge float %w, %x
%f2 = fcmp oeq float %y, %z
@@ -90,8 +81,8 @@ define i1 @ord_one_xor_f32(float %w, float %x, float %y, float %z) {
;
; AVX1-LABEL: ord_one_xor_f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpneq_oqps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpordps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vcmpneq_oqss %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpordss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -99,16 +90,11 @@ define i1 @ord_one_xor_f32(float %w, float %x, float %y, float %z) {
;
; AVX512-LABEL: ord_one_xor_f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpneq_oqps %zmm3, %zmm2, %k0
-; AVX512-NEXT: vcmpordps %zmm1, %zmm0, %k1
+; AVX512-NEXT: vcmpneq_oqss %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpordss %xmm1, %xmm0, %k1
; AVX512-NEXT: kxorw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp ord float %w, %x
%f2 = fcmp one float %y, %z
@@ -120,8 +106,8 @@ define i1 @ord_one_xor_f32(float %w, float %x, float %y, float %z) {
define i1 @une_oeq_xor_f32(float %w, float %x, float %y, float %z) {
; SSE2-LABEL: une_oeq_xor_f32:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmpeqps %xmm3, %xmm2
-; SSE2-NEXT: cmpneqps %xmm1, %xmm0
+; SSE2-NEXT: cmpeqss %xmm3, %xmm2
+; SSE2-NEXT: cmpneqss %xmm1, %xmm0
; SSE2-NEXT: xorps %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -129,8 +115,8 @@ define i1 @une_oeq_xor_f32(float %w, float %x, float %y, float %z) {
;
; AVX1-LABEL: une_oeq_xor_f32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpeqps %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vcmpeqss %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpneqss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -138,16 +124,11 @@ define i1 @une_oeq_xor_f32(float %w, float %x, float %y, float %z) {
;
; AVX512-LABEL: une_oeq_xor_f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpeqps %zmm3, %zmm2, %k0
-; AVX512-NEXT: vcmpneqps %zmm1, %zmm0, %k1
+; AVX512-NEXT: vcmpeqss %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpneqss %xmm1, %xmm0, %k1
; AVX512-NEXT: kxorw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp une float %w, %x
%f2 = fcmp oeq float %y, %z
@@ -158,8 +139,8 @@ define i1 @une_oeq_xor_f32(float %w, float %x, float %y, float %z) {
define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) {
; SSE2-LABEL: une_ugt_and_f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmpnlepd %xmm3, %xmm2
-; SSE2-NEXT: cmpneqpd %xmm1, %xmm0
+; SSE2-NEXT: cmpnlesd %xmm3, %xmm2
+; SSE2-NEXT: cmpneqsd %xmm1, %xmm0
; SSE2-NEXT: andpd %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -167,8 +148,8 @@ define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) {
;
; AVX1-LABEL: une_ugt_and_f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpnlepd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vcmpnlesd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -176,15 +157,11 @@ define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) {
;
; AVX512-LABEL: une_ugt_and_f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpneqpd %zmm1, %zmm0, %k1
-; AVX512-NEXT: vcmpnlepd %zmm3, %zmm2, %k0 {%k1}
+; AVX512-NEXT: vcmpnlesd %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpneqsd %xmm1, %xmm0, %k1
+; AVX512-NEXT: kandw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp une double %w, %x
%f2 = fcmp ugt double %y, %z
@@ -195,8 +172,8 @@ define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) {
define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) {
; SSE2-LABEL: ult_uge_or_f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmpnltpd %xmm3, %xmm2
-; SSE2-NEXT: cmpnlepd %xmm0, %xmm1
+; SSE2-NEXT: cmpnltsd %xmm3, %xmm2
+; SSE2-NEXT: cmpnlesd %xmm0, %xmm1
; SSE2-NEXT: orpd %xmm2, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -204,8 +181,8 @@ define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) {
;
; AVX1-LABEL: ult_uge_or_f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpnltpd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vcmpnltsd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vorpd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -213,16 +190,11 @@ define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) {
;
; AVX512-LABEL: ult_uge_or_f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpnltpd %zmm3, %zmm2, %k0
-; AVX512-NEXT: vcmpnlepd %zmm0, %zmm1, %k1
+; AVX512-NEXT: vcmpnltsd %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpnlesd %xmm0, %xmm1, %k1
; AVX512-NEXT: korw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp ult double %w, %x
%f2 = fcmp uge double %y, %z
@@ -233,8 +205,8 @@ define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) {
define i1 @une_uno_xor_f64(double %w, double %x, double %y, double %z) {
; SSE2-LABEL: une_uno_xor_f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: cmpunordpd %xmm3, %xmm2
-; SSE2-NEXT: cmpneqpd %xmm1, %xmm0
+; SSE2-NEXT: cmpunordsd %xmm3, %xmm2
+; SSE2-NEXT: cmpneqsd %xmm1, %xmm0
; SSE2-NEXT: xorpd %xmm2, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -242,8 +214,8 @@ define i1 @une_uno_xor_f64(double %w, double %x, double %y, double %z) {
;
; AVX1-LABEL: une_uno_xor_f64:
; AVX1: # %bb.0:
-; AVX1-NEXT: vcmpunordpd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vcmpunordsd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -251,16 +223,11 @@ define i1 @une_uno_xor_f64(double %w, double %x, double %y, double %z) {
;
; AVX512-LABEL: une_uno_xor_f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3
-; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcmpunordpd %zmm3, %zmm2, %k0
-; AVX512-NEXT: vcmpneqpd %zmm1, %zmm0, %k1
+; AVX512-NEXT: vcmpunordsd %xmm3, %xmm2, %k0
+; AVX512-NEXT: vcmpneqsd %xmm1, %xmm0, %k1
; AVX512-NEXT: kxorw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%f1 = fcmp une double %w, %x
%f2 = fcmp uno double %y, %z
@@ -371,8 +338,8 @@ define i1 @f32cmp3(float %x, float %y, float %z, float %w) {
; SSE2: # %bb.0:
; SSE2-NEXT: xorps %xmm4, %xmm4
; SSE2-NEXT: xorps %xmm5, %xmm5
-; SSE2-NEXT: cmpltps %xmm1, %xmm5
-; SSE2-NEXT: cmpltps %xmm0, %xmm4
+; SSE2-NEXT: cmpltss %xmm1, %xmm5
+; SSE2-NEXT: cmpltss %xmm0, %xmm4
; SSE2-NEXT: orps %xmm5, %xmm4
; SSE2-NEXT: movd %xmm4, %ecx
; SSE2-NEXT: ucomiss %xmm2, %xmm3
@@ -383,8 +350,8 @@ define i1 @f32cmp3(float %x, float %y, float %z, float %w) {
; AVX1-LABEL: f32cmp3:
; AVX1: # %bb.0:
; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vcmpltps %xmm1, %xmm4, %xmm1
-; AVX1-NEXT: vcmpltps %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vcmpltss %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vcmpltss %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %ecx
; AVX1-NEXT: vucomiss %xmm2, %xmm3
@@ -394,17 +361,14 @@ define i1 @f32cmp3(float %x, float %y, float %z, float %w) {
;
; AVX512-LABEL: f32cmp3:
; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512-NEXT: vxorps %xmm4, %xmm4, %xmm4
-; AVX512-NEXT: vcmpltps %zmm1, %zmm4, %k0
-; AVX512-NEXT: vcmpltps %zmm0, %zmm4, %k1
+; AVX512-NEXT: vcmpltss %xmm1, %xmm4, %k0
+; AVX512-NEXT: vcmpltss %xmm0, %xmm4, %k1
; AVX512-NEXT: korw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %ecx
; AVX512-NEXT: vucomiss %xmm2, %xmm3
; AVX512-NEXT: seta %al
; AVX512-NEXT: xorb %cl, %al
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%cmpx = fcmp ogt float %x, 0.0
%cmpy = fcmp ogt float %y, 0.0
@@ -425,8 +389,8 @@ define i1 @PR140534(i32 %a0, i32 %a1, i32 %a2) {
; SSE2-NEXT: cvtsi2sd %rax, %xmm2
; SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm3
-; SSE2-NEXT: cmpltpd %xmm2, %xmm3
-; SSE2-NEXT: cmpltpd %xmm0, %xmm1
+; SSE2-NEXT: cmpltsd %xmm2, %xmm3
+; SSE2-NEXT: cmpltsd %xmm0, %xmm1
; SSE2-NEXT: orpd %xmm3, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: # kill: def $al killed $al killed $eax
@@ -441,8 +405,8 @@ define i1 @PR140534(i32 %a0, i32 %a1, i32 %a2) {
; AVX1-NEXT: movl %edx, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vcmpltpd %xmm2, %xmm1, %xmm2
-; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vcmpltsd %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vcmpltsd %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vorpd %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: # kill: def $al killed $al killed $eax
@@ -454,12 +418,11 @@ define i1 @PR140534(i32 %a0, i32 %a1, i32 %a2) {
; AVX512-NEXT: vcvtusi2sd %esi, %xmm1, %xmm1
; AVX512-NEXT: vcvtusi2sd %edx, %xmm2, %xmm2
; AVX512-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512-NEXT: vcmpltpd %zmm2, %zmm1, %k0
-; AVX512-NEXT: vcmpltpd %zmm0, %zmm1, %k1
+; AVX512-NEXT: vcmpltsd %xmm2, %xmm1, %k0
+; AVX512-NEXT: vcmpltsd %xmm0, %xmm1, %k1
; AVX512-NEXT: korw %k0, %k1, %k0
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
-; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%conv0 = uitofp i32 %a0 to double
%conv1 = uitofp i32 %a1 to double
diff --git a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
index d9d5e2846ed0f..43bac05988e29 100644
--- a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll
@@ -322,8 +322,8 @@ define i32 @test_zext_cmp11(double %a, double %b) "no-nans-fp-math"="true" {
; ALL-LABEL: test_zext_cmp11:
; ALL: #...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/18033 Here is the relevant piece of the build log for the reference
|
…ode with scalar fcmp node if only element0 is demanded (llvm#140563) These unnecessary vectorisation can appear due to fplogic opcodes only being available for 128-bit types - which can prevent folds that only work on the scalar source types and also lead to fcmp of garbage data in the upper elements. Fixes llvm#140534
…ode with scalar fcmp node if only element0 is demanded (llvm#140563) These unnecessary vectorisation can appear due to fplogic opcodes only being available for 128-bit types - which can prevent folds that only work on the scalar source types and also lead to fcmp of garbage data in the upper elements. Fixes llvm#140534
These unnecessary vectorisation can appear due to fplogic opcodes only being available for 128-bit types - which can prevent folds that only work on the scalar source types and also lead to fcmp of garbage data in the upper elements.
Fixes #140534