Skip to content

Commit a508e9e

Browse files
committed
[X86] Try Folding icmp of v8i32 -> fcmp of v8f32 on AVX
Fixes: #82242 The idea is that AVX doesn't support comparisons for `v8i32` so it splits the comparison into 2x `v4i32` comparisons + reconstruction of the `v8i32`. By converting to a float, we can handle the comparison with 1/2 instructions (1 if we can `bitcast`, 2 if we need to cast with `sitofp`). The Proofs: https://alive2.llvm.org/ce/z/AJDdQ8 Timeout, but they can be reproduced locally.
1 parent ff91308 commit a508e9e

30 files changed

+2135
-2288
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23299,6 +23299,110 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
2329923299
}
2330023300
}
2330123301

23302+
// We get bad codegen for v8i32 compares on avx targets (without avx2) so if
23303+
// possible convert to a v8f32 compare.
23304+
if (VTOp0.getVectorElementType() == MVT::i32 && VTOp0 == MVT::v8i32 &&
23305+
Subtarget.hasAVX() && !Subtarget.hasAVX2()) {
23306+
std::optional<KnownBits> KnownOps[2];
23307+
// Check if an op is known to be in a certain range.
23308+
auto OpInRange = [&DAG, Op, &KnownOps](unsigned OpNo, bool CmpLT,
23309+
const APInt Bound) {
23310+
if (!KnownOps[OpNo].has_value())
23311+
KnownOps[OpNo] = DAG.computeKnownBits(Op.getOperand(OpNo));
23312+
23313+
if (KnownOps[OpNo]->isUnknown())
23314+
return false;
23315+
23316+
std::optional<bool> Res;
23317+
if (CmpLT)
23318+
Res = KnownBits::ult(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
23319+
else
23320+
Res = KnownBits::ugt(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
23321+
return Res.has_value() && *Res;
23322+
};
23323+
23324+
bool OkayCvt = false;
23325+
bool OkayBitcast = false;
23326+
23327+
// For cvt up to 1 << (Significand Precision)
23328+
const APInt MaxConvertableCvt = APInt(32, (1U << 24));
23329+
// For bitcast up to (and including) first inf representation (0x7f800000)
23330+
const APInt MaxConvertableBitcast = APInt(32, 0x7f800001);
23331+
23332+
// For bitcast we need both lhs/op1 u< MaxConvertableBitcast
23333+
// NB: It might be worth it to enable to bitcast version for unsigned avx2
23334+
// comparisons as they typically require multiple instructions to lower
23335+
// (they don't fit `vpcmpeq`/`vpcmpgt` well).
23336+
if (OpInRange(1, /*CmpLT*/ true, MaxConvertableBitcast) &&
23337+
OpInRange(0, /*CmpLT*/ true, MaxConvertableBitcast)) {
23338+
OkayBitcast = true;
23339+
}
23340+
// We want to convert icmp -> fcmp using `sitofp` iff one of the converts
23341+
// will be constant folded.
23342+
else if ((DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op1)) ||
23343+
DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op0)))) {
23344+
if (isUnsignedIntSetCC(Cond)) {
23345+
// For cvt + unsigned compare we need both lhs/rhs >= 0 and either lhs
23346+
// or rhs < MaxConvertableCvt
23347+
23348+
if (OpInRange(1, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
23349+
OpInRange(0, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
23350+
(OpInRange(1, /*CmpLT*/ true, MaxConvertableCvt) ||
23351+
OpInRange(0, /*CmpLT*/ true, MaxConvertableCvt)))
23352+
OkayCvt = true;
23353+
} else {
23354+
// For cvt + signed compare we need abs(lhs) or abs(rhs) <
23355+
// MaxConvertableCvt
23356+
if (OpInRange(1, /*CmpLT*/ true, MaxConvertableCvt) ||
23357+
OpInRange(1, /*CmpLT*/ false, -MaxConvertableCvt) ||
23358+
OpInRange(0, /*CmpLT*/ true, MaxConvertableCvt) ||
23359+
OpInRange(0, /*CmpLT*/ false, -MaxConvertableCvt))
23360+
OkayCvt = true;
23361+
}
23362+
}
23363+
23364+
if (OkayBitcast || OkayCvt) {
23365+
switch (Cond) {
23366+
default:
23367+
llvm_unreachable("Unexpected SETCC condition");
23368+
// Get the new FP condition. Note for the unsigned conditions we have
23369+
// verified its okay to convert to the signed version.
23370+
case ISD::SETULT:
23371+
case ISD::SETLT:
23372+
Cond = ISD::SETOLT;
23373+
break;
23374+
case ISD::SETUGT:
23375+
case ISD::SETGT:
23376+
Cond = ISD::SETOGT;
23377+
break;
23378+
case ISD::SETULE:
23379+
case ISD::SETLE:
23380+
Cond = ISD::SETOLE;
23381+
break;
23382+
case ISD::SETUGE:
23383+
case ISD::SETGE:
23384+
Cond = ISD::SETOGE;
23385+
break;
23386+
case ISD::SETEQ:
23387+
Cond = ISD::SETOEQ;
23388+
break;
23389+
case ISD::SETNE:
23390+
Cond = ISD::SETONE;
23391+
break;
23392+
}
23393+
23394+
MVT FpVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
23395+
if (OkayBitcast) {
23396+
Op0 = DAG.getBitcast(FpVT, Op0);
23397+
Op1 = DAG.getBitcast(FpVT, Op1);
23398+
} else {
23399+
Op0 = DAG.getNode(ISD::SINT_TO_FP, dl, FpVT, Op0);
23400+
Op1 = DAG.getNode(ISD::SINT_TO_FP, dl, FpVT, Op1);
23401+
}
23402+
return DAG.getSetCC(dl, VT, Op0, Op1, Cond);
23403+
}
23404+
}
23405+
2330223406
// Break 256-bit integer vector compare into smaller ones.
2330323407
if (VT.is256BitVector() && !Subtarget.hasInt256())
2330423408
return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,7 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
258258
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
259259
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
260260
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
261-
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
262-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
263-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
264-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
261+
; AVX1-NEXT: vcmpeqps %ymm1, %ymm0, %ymm0
265262
; AVX1-NEXT: retq
266263
;
267264
; AVX2-LABEL: ext_i8_8i32:
@@ -489,16 +486,10 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
489486
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
490487
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
491488
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
492-
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
493-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
494-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
495-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
489+
; AVX1-NEXT: vcmpeqps %ymm0, %ymm2, %ymm0
496490
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
497491
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
498-
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
499-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
500-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
501-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
492+
; AVX1-NEXT: vcmpeqps %ymm2, %ymm1, %ymm1
502493
; AVX1-NEXT: retq
503494
;
504495
; AVX2-LABEL: ext_i16_16i32:

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -327,10 +327,9 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
327327
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
328328
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
329329
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
330-
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
331-
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
330+
; AVX1-NEXT: vcmpeqps %ymm1, %ymm0, %ymm0
331+
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
332332
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
333-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
334333
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
335334
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
336335
; AVX1-NEXT: retq
@@ -630,18 +629,16 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
630629
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
631630
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
632631
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
633-
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
632+
; AVX1-NEXT: vcmpeqps %ymm0, %ymm2, %ymm0
633+
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm2
634+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
634635
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
635-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
636-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
637-
; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
638-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
636+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
639637
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
640638
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
641-
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
642-
; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
639+
; AVX1-NEXT: vcmpeqps %ymm2, %ymm1, %ymm1
640+
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
643641
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
644-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
645642
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
646643
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
647644
; AVX1-NEXT: retq

llvm/test/CodeGen/X86/cmpf-avx.ll

Lines changed: 33 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,13 @@ define <8 x i32> @cmp_eq_bitcast(<8 x i32> %x) {
66
; X86-LABEL: cmp_eq_bitcast:
77
; X86: # %bb.0:
88
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
9-
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
10-
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
11-
; X86-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
12-
; X86-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
13-
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
9+
; X86-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
1410
; X86-NEXT: retl
1511
;
1612
; X64-LABEL: cmp_eq_bitcast:
1713
; X64: # %bb.0:
1814
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
19-
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
20-
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
21-
; X64-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
22-
; X64-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
23-
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
15+
; X64-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2416
; X64-NEXT: retq
2517
%and = and <8 x i32> %x, <i32 7, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2618
%cmp = icmp eq <8 x i32> %and, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -29,17 +21,17 @@ define <8 x i32> @cmp_eq_bitcast(<8 x i32> %x) {
2921
}
3022

3123
define <8 x i32> @cmp_ne_sitofp(<8 x i32> %x) {
32-
; CHECK-LABEL: cmp_ne_sitofp:
33-
; CHECK: # %bb.0:
34-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
35-
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
36-
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
37-
; CHECK-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
38-
; CHECK-NEXT: vpxor %xmm3, %xmm1, %xmm1
39-
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
40-
; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
41-
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
42-
; CHECK-NEXT: ret{{[l|q]}}
24+
; X86-LABEL: cmp_ne_sitofp:
25+
; X86: # %bb.0:
26+
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
27+
; X86-NEXT: vcmpneq_oqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
28+
; X86-NEXT: retl
29+
;
30+
; X64-LABEL: cmp_ne_sitofp:
31+
; X64: # %bb.0:
32+
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
33+
; X64-NEXT: vcmpneq_oqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
34+
; X64-NEXT: retq
4335
%cmp = icmp ne <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
4436
%sext = sext <8 x i1> %cmp to <8 x i32>
4537
ret <8 x i32> %sext
@@ -72,14 +64,17 @@ define <8 x i32> @cmp_slt_fail_no_const(<8 x i32> %x, <8 x i32> %y) {
7264
}
7365

7466
define <8 x i32> @cmp_eq_sitofp(<8 x i32> %x) {
75-
; CHECK-LABEL: cmp_eq_sitofp:
76-
; CHECK: # %bb.0:
77-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
78-
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [4294967293,4294967293,4294967293,4294967293]
79-
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
80-
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
81-
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
82-
; CHECK-NEXT: ret{{[l|q]}}
67+
; X86-LABEL: cmp_eq_sitofp:
68+
; X86: # %bb.0:
69+
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
70+
; X86-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
71+
; X86-NEXT: retl
72+
;
73+
; X64-LABEL: cmp_eq_sitofp:
74+
; X64: # %bb.0:
75+
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
76+
; X64-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
77+
; X64-NEXT: retq
8378
%cmp = icmp eq <8 x i32> %x, <i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3>
8479
%sext = sext <8 x i1> %cmp to <8 x i32>
8580
ret <8 x i32> %sext
@@ -105,11 +100,7 @@ define <8 x i32> @cmp_sgt_bitcast(<8 x i32> %xx, <8 x i32> %yy) {
105100
; CHECK-NEXT: vbroadcastss {{.*#+}} ymm2 = [2139095040,2139095040,2139095040,2139095040,2139095040,2139095040,2139095040,2139095040]
106101
; CHECK-NEXT: vandps %ymm2, %ymm0, %ymm0
107102
; CHECK-NEXT: vandps %ymm2, %ymm1, %ymm1
108-
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
109-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3
110-
; CHECK-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
111-
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
112-
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
103+
; CHECK-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
113104
; CHECK-NEXT: ret{{[l|q]}}
114105
%x = and <8 x i32> %xx, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
115106
%y = and <8 x i32> %yy, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
@@ -214,21 +205,13 @@ define <8 x i32> @cmp_ule_bitcast(<8 x i32> %xx) {
214205
; X86-LABEL: cmp_ule_bitcast:
215206
; X86: # %bb.0:
216207
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
217-
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
218-
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [4,4,4,4]
219-
; X86-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
220-
; X86-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
221-
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
208+
; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
222209
; X86-NEXT: retl
223210
;
224211
; X64-LABEL: cmp_ule_bitcast:
225212
; X64: # %bb.0:
226213
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
227-
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
228-
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [4,4,4,4]
229-
; X64-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
230-
; X64-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
231-
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
214+
; X64-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
232215
; X64-NEXT: retq
233216
%x = and <8 x i32> %xx, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
234217
%cmp = icmp ule <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -240,21 +223,17 @@ define <8 x i32> @cmp_ugt_sitofp(<8 x i32> %xx) {
240223
; X86-LABEL: cmp_ugt_sitofp:
241224
; X86: # %bb.0:
242225
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
243-
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
244-
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
245-
; X86-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
246-
; X86-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
247-
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
226+
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
227+
; X86-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
228+
; X86-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
248229
; X86-NEXT: retl
249230
;
250231
; X64-LABEL: cmp_ugt_sitofp:
251232
; X64: # %bb.0:
252233
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
253-
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
254-
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
255-
; X64-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
256-
; X64-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
257-
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
234+
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
235+
; X64-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
236+
; X64-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
258237
; X64-NEXT: retq
259238
%x = and <8 x i32> %xx, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
260239
%cmp = icmp ugt <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>

llvm/test/CodeGen/X86/combine-testps.ll

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -171,13 +171,24 @@ define i32 @testpsz_128_signbit(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b)
171171
}
172172

173173
define i32 @testpsnzc_256_signbit(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
174-
; CHECK-LABEL: testpsnzc_256_signbit:
175-
; CHECK: # %bb.0:
176-
; CHECK-NEXT: movl %edi, %eax
177-
; CHECK-NEXT: vtestps %ymm1, %ymm0
178-
; CHECK-NEXT: cmovnel %esi, %eax
179-
; CHECK-NEXT: vzeroupper
180-
; CHECK-NEXT: retq
174+
; AVX-LABEL: testpsnzc_256_signbit:
175+
; AVX: # %bb.0:
176+
; AVX-NEXT: movl %edi, %eax
177+
; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
178+
; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
179+
; AVX-NEXT: vcmpltps %ymm2, %ymm0, %ymm0
180+
; AVX-NEXT: vtestps %ymm1, %ymm0
181+
; AVX-NEXT: cmovnel %esi, %eax
182+
; AVX-NEXT: vzeroupper
183+
; AVX-NEXT: retq
184+
;
185+
; AVX2-LABEL: testpsnzc_256_signbit:
186+
; AVX2: # %bb.0:
187+
; AVX2-NEXT: movl %edi, %eax
188+
; AVX2-NEXT: vtestps %ymm1, %ymm0
189+
; AVX2-NEXT: cmovnel %esi, %eax
190+
; AVX2-NEXT: vzeroupper
191+
; AVX2-NEXT: retq
181192
%t0 = bitcast <8 x float> %c to <8 x i32>
182193
%t1 = icmp sgt <8 x i32> zeroinitializer, %t0
183194
%t2 = sext <8 x i1> %t1 to <8 x i32>

llvm/test/CodeGen/X86/masked_compressstore.ll

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1844,25 +1844,25 @@ define void @compressstore_v32f32_v32i32(ptr %base, <32 x float> %V, <32 x i32>
18441844
;
18451845
; AVX1-LABEL: compressstore_v32f32_v32i32:
18461846
; AVX1: ## %bb.0:
1847-
; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm8
1848-
; AVX1-NEXT: vpxor %xmm9, %xmm9, %xmm9
1849-
; AVX1-NEXT: vpcmpeqd %xmm9, %xmm8, %xmm8
1850-
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm9, %xmm5
1851-
; AVX1-NEXT: vpackssdw %xmm8, %xmm5, %xmm5
1852-
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm8
1853-
; AVX1-NEXT: vpcmpeqd %xmm9, %xmm8, %xmm8
1854-
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm9, %xmm4
1855-
; AVX1-NEXT: vpackssdw %xmm8, %xmm4, %xmm4
1847+
; AVX1-NEXT: vcvtdq2ps %ymm5, %ymm5
1848+
; AVX1-NEXT: vxorps %xmm8, %xmm8, %xmm8
1849+
; AVX1-NEXT: vcmpeqps %ymm5, %ymm8, %ymm5
1850+
; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm9
1851+
; AVX1-NEXT: vpackssdw %xmm9, %xmm5, %xmm5
1852+
; AVX1-NEXT: vcvtdq2ps %ymm4, %ymm4
1853+
; AVX1-NEXT: vcmpeqps %ymm4, %ymm8, %ymm4
1854+
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm9
1855+
; AVX1-NEXT: vpackssdw %xmm9, %xmm4, %xmm4
18561856
; AVX1-NEXT: vpacksswb %xmm5, %xmm4, %xmm4
18571857
; AVX1-NEXT: vpmovmskb %xmm4, %ecx
1858-
; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm4
1859-
; AVX1-NEXT: vpcmpeqd %xmm4, %xmm9, %xmm4
1860-
; AVX1-NEXT: vpcmpeqd %xmm7, %xmm9, %xmm5
1861-
; AVX1-NEXT: vpackssdw %xmm4, %xmm5, %xmm4
1862-
; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm5
1863-
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm9, %xmm5
1864-
; AVX1-NEXT: vpcmpeqd %xmm6, %xmm9, %xmm6
1865-
; AVX1-NEXT: vpackssdw %xmm5, %xmm6, %xmm5
1858+
; AVX1-NEXT: vcvtdq2ps %ymm7, %ymm4
1859+
; AVX1-NEXT: vcmpeqps %ymm4, %ymm8, %ymm4
1860+
; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm5
1861+
; AVX1-NEXT: vpackssdw %xmm5, %xmm4, %xmm4
1862+
; AVX1-NEXT: vcvtdq2ps %ymm6, %ymm5
1863+
; AVX1-NEXT: vcmpeqps %ymm5, %ymm8, %ymm5
1864+
; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6
1865+
; AVX1-NEXT: vpackssdw %xmm6, %xmm5, %xmm5
18661866
; AVX1-NEXT: vpacksswb %xmm4, %xmm5, %xmm4
18671867
; AVX1-NEXT: vpmovmskb %xmm4, %eax
18681868
; AVX1-NEXT: shll $16, %eax

0 commit comments

Comments
 (0)