Skip to content

Commit 0194e79

Browse files
committed
[X86] Try Folding icmp of v8i32 -> fcmp of v8f32 on AVX
Fixes: #82242 The idea is that AVX doesn't support comparisons for `v8i32` so it splits the comparison into 2x `v4i32` comparisons + reconstruction of the `v8i32`. By converting to a float, we can handle the comparison with 1/2 instructions (1 if we can `bitcast`, 2 if we need to cast with `sitofp`). The Proofs: https://alive2.llvm.org/ce/z/AJDdQ8 Timeout, but they can be reproduced locally.
1 parent ff91308 commit 0194e79

30 files changed

+2145
-2288
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23299,6 +23299,120 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
2329923299
}
2330023300
}
2330123301

23302+
// We get bad codegen for v8i32 compares on avx targets (without avx2) so if
23303+
// possible convert to a v8f32 compare.
23304+
if (VTOp0 == MVT::v8i32 && Subtarget.hasAVX() && !Subtarget.hasAVX2()) {
23305+
std::optional<KnownBits> KnownOps[2];
23306+
// Check if an op is known to be in a certain range.
23307+
auto OpInRange = [&DAG, Op, &KnownOps](unsigned OpNo, bool CmpLT,
23308+
const APInt Bound) {
23309+
if (!KnownOps[OpNo].has_value())
23310+
KnownOps[OpNo] = DAG.computeKnownBits(Op.getOperand(OpNo));
23311+
23312+
if (KnownOps[OpNo]->isUnknown())
23313+
return false;
23314+
23315+
std::optional<bool> Res;
23316+
if (CmpLT)
23317+
Res = KnownBits::ult(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
23318+
else
23319+
Res = KnownBits::ugt(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
23320+
return Res.has_value() && *Res;
23321+
};
23322+
23323+
bool OkayCvt = false;
23324+
bool OkayBitcast = false;
23325+
23326+
const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(MVT::f32);
23327+
23328+
// For cvt up to 1 << (Significand Precision), (1 << 24 for ieee float)
23329+
const APInt MaxConvertableCvt =
23330+
APInt(32, (1U << APFloat::semanticsPrecision(Sem)));
23331+
// For bitcast up to (and including) first inf representation (0x7f800000 +
23332+
// 1 for ieee float)
23333+
const APInt MaxConvertableBitcast =
23334+
APFloat::getInf(Sem).bitcastToAPInt() + 1;
23335+
23336+
assert(
23337+
MaxConvertableBitcast.getBitWidth() == 32 &&
23338+
MaxConvertableCvt == (1U << 24) &&
23339+
MaxConvertableBitcast == 0x7f800001 &&
23340+
"This transform has only been verified to IEEE Single Precision Float");
23341+
23342+
// For bitcast we need both lhs/op1 u< MaxConvertableBitcast
23343+
// NB: It might be worth it to enable to bitcast version for unsigned avx2
23344+
// comparisons as they typically require multiple instructions to lower
23345+
// (they don't fit `vpcmpeq`/`vpcmpgt` well).
23346+
if (OpInRange(1, /*CmpLT*/ true, MaxConvertableBitcast) &&
23347+
OpInRange(0, /*CmpLT*/ true, MaxConvertableBitcast)) {
23348+
OkayBitcast = true;
23349+
}
23350+
// We want to convert icmp -> fcmp using `sitofp` iff one of the converts
23351+
// will be constant folded.
23352+
else if ((DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op1)) ||
23353+
DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op0)))) {
23354+
if (isUnsignedIntSetCC(Cond)) {
23355+
// For cvt + unsigned compare we need both lhs/rhs >= 0 and either lhs
23356+
// or rhs < MaxConvertableCvt
23357+
23358+
if (OpInRange(1, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
23359+
OpInRange(0, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
23360+
(OpInRange(1, /*CmpLT*/ true, MaxConvertableCvt) ||
23361+
OpInRange(0, /*CmpLT*/ true, MaxConvertableCvt)))
23362+
OkayCvt = true;
23363+
} else {
23364+
// For cvt + signed compare we need abs(lhs) or abs(rhs) <
23365+
// MaxConvertableCvt
23366+
if (OpInRange(1, /*CmpLT*/ true, MaxConvertableCvt) ||
23367+
OpInRange(1, /*CmpLT*/ false, -MaxConvertableCvt) ||
23368+
OpInRange(0, /*CmpLT*/ true, MaxConvertableCvt) ||
23369+
OpInRange(0, /*CmpLT*/ false, -MaxConvertableCvt))
23370+
OkayCvt = true;
23371+
}
23372+
}
23373+
23374+
if (OkayBitcast || OkayCvt) {
23375+
switch (Cond) {
23376+
default:
23377+
llvm_unreachable("Unexpected SETCC condition");
23378+
// Get the new FP condition. Note for the unsigned conditions we have
23379+
// verified its okay to convert to the signed version.
23380+
case ISD::SETULT:
23381+
case ISD::SETLT:
23382+
Cond = ISD::SETOLT;
23383+
break;
23384+
case ISD::SETUGT:
23385+
case ISD::SETGT:
23386+
Cond = ISD::SETOGT;
23387+
break;
23388+
case ISD::SETULE:
23389+
case ISD::SETLE:
23390+
Cond = ISD::SETOLE;
23391+
break;
23392+
case ISD::SETUGE:
23393+
case ISD::SETGE:
23394+
Cond = ISD::SETOGE;
23395+
break;
23396+
case ISD::SETEQ:
23397+
Cond = ISD::SETOEQ;
23398+
break;
23399+
case ISD::SETNE:
23400+
Cond = ISD::SETONE;
23401+
break;
23402+
}
23403+
23404+
MVT FpVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
23405+
if (OkayBitcast) {
23406+
Op0 = DAG.getBitcast(FpVT, Op0);
23407+
Op1 = DAG.getBitcast(FpVT, Op1);
23408+
} else {
23409+
Op0 = DAG.getNode(ISD::SINT_TO_FP, dl, FpVT, Op0);
23410+
Op1 = DAG.getNode(ISD::SINT_TO_FP, dl, FpVT, Op1);
23411+
}
23412+
return DAG.getSetCC(dl, VT, Op0, Op1, Cond);
23413+
}
23414+
}
23415+
2330223416
// Break 256-bit integer vector compare into smaller ones.
2330323417
if (VT.is256BitVector() && !Subtarget.hasInt256())
2330423418
return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,7 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
258258
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
259259
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
260260
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
261-
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
262-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
263-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
264-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
261+
; AVX1-NEXT: vcmpeqps %ymm1, %ymm0, %ymm0
265262
; AVX1-NEXT: retq
266263
;
267264
; AVX2-LABEL: ext_i8_8i32:
@@ -489,16 +486,10 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
489486
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
490487
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
491488
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
492-
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
493-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
494-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
495-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
489+
; AVX1-NEXT: vcmpeqps %ymm0, %ymm2, %ymm0
496490
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
497491
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
498-
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
499-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
500-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
501-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
492+
; AVX1-NEXT: vcmpeqps %ymm2, %ymm1, %ymm1
502493
; AVX1-NEXT: retq
503494
;
504495
; AVX2-LABEL: ext_i16_16i32:

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -327,10 +327,9 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
327327
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
328328
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
329329
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
330-
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
331-
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
330+
; AVX1-NEXT: vcmpeqps %ymm1, %ymm0, %ymm0
331+
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
332332
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
333-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
334333
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
335334
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
336335
; AVX1-NEXT: retq
@@ -630,18 +629,16 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
630629
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
631630
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
632631
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
633-
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
632+
; AVX1-NEXT: vcmpeqps %ymm0, %ymm2, %ymm0
633+
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm2
634+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
634635
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
635-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
636-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
637-
; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
638-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
636+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
639637
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
640638
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
641-
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
642-
; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
639+
; AVX1-NEXT: vcmpeqps %ymm2, %ymm1, %ymm1
640+
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
643641
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
644-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
645642
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
646643
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
647644
; AVX1-NEXT: retq

llvm/test/CodeGen/X86/cmpf-avx.ll

Lines changed: 33 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,13 @@ define <8 x i32> @cmp_eq_bitcast(<8 x i32> %x) {
66
; X86-LABEL: cmp_eq_bitcast:
77
; X86: # %bb.0:
88
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
9-
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
10-
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
11-
; X86-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
12-
; X86-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
13-
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
9+
; X86-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
1410
; X86-NEXT: retl
1511
;
1612
; X64-LABEL: cmp_eq_bitcast:
1713
; X64: # %bb.0:
1814
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
19-
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
20-
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
21-
; X64-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
22-
; X64-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
23-
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
15+
; X64-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2416
; X64-NEXT: retq
2517
%and = and <8 x i32> %x, <i32 7, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2618
%cmp = icmp eq <8 x i32> %and, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -29,17 +21,17 @@ define <8 x i32> @cmp_eq_bitcast(<8 x i32> %x) {
2921
}
3022

3123
define <8 x i32> @cmp_ne_sitofp(<8 x i32> %x) {
32-
; CHECK-LABEL: cmp_ne_sitofp:
33-
; CHECK: # %bb.0:
34-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
35-
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
36-
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
37-
; CHECK-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
38-
; CHECK-NEXT: vpxor %xmm3, %xmm1, %xmm1
39-
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
40-
; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
41-
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
42-
; CHECK-NEXT: ret{{[l|q]}}
24+
; X86-LABEL: cmp_ne_sitofp:
25+
; X86: # %bb.0:
26+
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
27+
; X86-NEXT: vcmpneq_oqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
28+
; X86-NEXT: retl
29+
;
30+
; X64-LABEL: cmp_ne_sitofp:
31+
; X64: # %bb.0:
32+
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
33+
; X64-NEXT: vcmpneq_oqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
34+
; X64-NEXT: retq
4335
%cmp = icmp ne <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
4436
%sext = sext <8 x i1> %cmp to <8 x i32>
4537
ret <8 x i32> %sext
@@ -72,14 +64,17 @@ define <8 x i32> @cmp_slt_fail_no_const(<8 x i32> %x, <8 x i32> %y) {
7264
}
7365

7466
define <8 x i32> @cmp_eq_sitofp(<8 x i32> %x) {
75-
; CHECK-LABEL: cmp_eq_sitofp:
76-
; CHECK: # %bb.0:
77-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
78-
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [4294967293,4294967293,4294967293,4294967293]
79-
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
80-
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
81-
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
82-
; CHECK-NEXT: ret{{[l|q]}}
67+
; X86-LABEL: cmp_eq_sitofp:
68+
; X86: # %bb.0:
69+
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
70+
; X86-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
71+
; X86-NEXT: retl
72+
;
73+
; X64-LABEL: cmp_eq_sitofp:
74+
; X64: # %bb.0:
75+
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
76+
; X64-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
77+
; X64-NEXT: retq
8378
%cmp = icmp eq <8 x i32> %x, <i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3>
8479
%sext = sext <8 x i1> %cmp to <8 x i32>
8580
ret <8 x i32> %sext
@@ -105,11 +100,7 @@ define <8 x i32> @cmp_sgt_bitcast(<8 x i32> %xx, <8 x i32> %yy) {
105100
; CHECK-NEXT: vbroadcastss {{.*#+}} ymm2 = [2139095040,2139095040,2139095040,2139095040,2139095040,2139095040,2139095040,2139095040]
106101
; CHECK-NEXT: vandps %ymm2, %ymm0, %ymm0
107102
; CHECK-NEXT: vandps %ymm2, %ymm1, %ymm1
108-
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm2
109-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3
110-
; CHECK-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
111-
; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
112-
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
103+
; CHECK-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
113104
; CHECK-NEXT: ret{{[l|q]}}
114105
%x = and <8 x i32> %xx, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
115106
%y = and <8 x i32> %yy, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
@@ -214,21 +205,13 @@ define <8 x i32> @cmp_ule_bitcast(<8 x i32> %xx) {
214205
; X86-LABEL: cmp_ule_bitcast:
215206
; X86: # %bb.0:
216207
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
217-
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
218-
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [4,4,4,4]
219-
; X86-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
220-
; X86-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
221-
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
208+
; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
222209
; X86-NEXT: retl
223210
;
224211
; X64-LABEL: cmp_ule_bitcast:
225212
; X64: # %bb.0:
226213
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
227-
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
228-
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [4,4,4,4]
229-
; X64-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
230-
; X64-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
231-
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
214+
; X64-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
232215
; X64-NEXT: retq
233216
%x = and <8 x i32> %xx, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
234217
%cmp = icmp ule <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -240,21 +223,17 @@ define <8 x i32> @cmp_ugt_sitofp(<8 x i32> %xx) {
240223
; X86-LABEL: cmp_ugt_sitofp:
241224
; X86: # %bb.0:
242225
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
243-
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
244-
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
245-
; X86-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
246-
; X86-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
247-
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
226+
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
227+
; X86-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
228+
; X86-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
248229
; X86-NEXT: retl
249230
;
250231
; X64-LABEL: cmp_ugt_sitofp:
251232
; X64: # %bb.0:
252233
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
253-
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
254-
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
255-
; X64-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
256-
; X64-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
257-
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
234+
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
235+
; X64-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
236+
; X64-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
258237
; X64-NEXT: retq
259238
%x = and <8 x i32> %xx, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
260239
%cmp = icmp ugt <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>

llvm/test/CodeGen/X86/combine-testps.ll

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -171,13 +171,24 @@ define i32 @testpsz_128_signbit(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b)
171171
}
172172

173173
define i32 @testpsnzc_256_signbit(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
174-
; CHECK-LABEL: testpsnzc_256_signbit:
175-
; CHECK: # %bb.0:
176-
; CHECK-NEXT: movl %edi, %eax
177-
; CHECK-NEXT: vtestps %ymm1, %ymm0
178-
; CHECK-NEXT: cmovnel %esi, %eax
179-
; CHECK-NEXT: vzeroupper
180-
; CHECK-NEXT: retq
174+
; AVX-LABEL: testpsnzc_256_signbit:
175+
; AVX: # %bb.0:
176+
; AVX-NEXT: movl %edi, %eax
177+
; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
178+
; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
179+
; AVX-NEXT: vcmpltps %ymm2, %ymm0, %ymm0
180+
; AVX-NEXT: vtestps %ymm1, %ymm0
181+
; AVX-NEXT: cmovnel %esi, %eax
182+
; AVX-NEXT: vzeroupper
183+
; AVX-NEXT: retq
184+
;
185+
; AVX2-LABEL: testpsnzc_256_signbit:
186+
; AVX2: # %bb.0:
187+
; AVX2-NEXT: movl %edi, %eax
188+
; AVX2-NEXT: vtestps %ymm1, %ymm0
189+
; AVX2-NEXT: cmovnel %esi, %eax
190+
; AVX2-NEXT: vzeroupper
191+
; AVX2-NEXT: retq
181192
%t0 = bitcast <8 x float> %c to <8 x i32>
182193
%t1 = icmp sgt <8 x i32> zeroinitializer, %t0
183194
%t2 = sext <8 x i1> %t1 to <8 x i32>

0 commit comments

Comments
 (0)