Skip to content

Commit ba7d9d1

Browse files
committed
[X86] Try Folding icmp of v8i32 -> fcmp of v8f32 on AVX
Fixes: #82242 The idea is that AVX doesn't support comparisons for `v8i32` so it splits the comparison into 2x `v4i32` comparisons + reconstruction of the `v8i32`. By converting to a float, we can handle the comparison with 1/2 instructions (1 if we can `bitcast`, 2 if we need to cast with `sitofp`). The Proofs: https://alive2.llvm.org/ce/z/AJDdQ8 Timeout, but they can be reproduced locally.
1 parent 8183dc5 commit ba7d9d1

30 files changed

+2231
-2296
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23390,6 +23390,136 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
2339023390
}
2339123391
}
2339223392

23393+
// We get bad codegen for v8i32 compares on avx targets (without avx2) so if
23394+
// possible convert to a v8f32 compare.
23395+
if (VTOp0 == MVT::v8i32 && Subtarget.hasAVX() && !Subtarget.hasAVX2()) {
23396+
std::optional<KnownBits> KnownOps[2];
23397+
// Check if an op is known to be in a certain range.
23398+
auto OpInRange = [&DAG, Op, &KnownOps](unsigned OpNo, bool CmpLT,
23399+
const APInt Bound) {
23400+
if (!KnownOps[OpNo].has_value())
23401+
KnownOps[OpNo] = DAG.computeKnownBits(Op.getOperand(OpNo));
23402+
23403+
if (KnownOps[OpNo]->isUnknown())
23404+
return false;
23405+
23406+
std::optional<bool> Res;
23407+
if (CmpLT)
23408+
Res = KnownBits::ult(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
23409+
else
23410+
Res = KnownBits::ugt(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
23411+
return Res.value_or(false);
23412+
};
23413+
23414+
bool OkayCvt = false;
23415+
bool OkayBitcast = false;
23416+
23417+
const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(MVT::f32);
23418+
23419+
// For cvt up to 1 << (Significand Precision), (1 << 24 for ieee float)
23420+
const APInt MaxConvertableCvt =
23421+
APInt::getOneBitSet(32, APFloat::semanticsPrecision(Sem));
23422+
// For bitcast up to (and including) first inf representation (0x7f800000 +
23423+
// 1 for ieee float)
23424+
const APInt MaxConvertableBitcast =
23425+
APFloat::getInf(Sem).bitcastToAPInt() + 1;
23426+
// For bitcast we also exclude de-norm values. This is absolutely necessary
23427+
// for strict semantic correctness, but DAZ (de-norm as zero) will break if
23428+
// we don't have this check.
23429+
const APInt MinConvertableBitcast =
23430+
APFloat::getSmallestNormalized(Sem).bitcastToAPInt() - 1;
23431+
23432+
assert(
23433+
MaxConvertableBitcast.getBitWidth() == 32 &&
23434+
MaxConvertableCvt == (1U << 24) &&
23435+
MaxConvertableBitcast == 0x7f800001 &&
23436+
MinConvertableBitcast.isNonNegative() &&
23437+
MaxConvertableBitcast.sgt(MinConvertableBitcast) &&
23438+
"This transform has only been verified to IEEE Single Precision Float");
23439+
23440+
// For bitcast we need both lhs/op1 u< MaxConvertableBitcast
23441+
// NB: It might be worth it to enable to bitcast version for unsigned avx2
23442+
// comparisons as they typically require multiple instructions to lower
23443+
// (they don't fit `vpcmpeq`/`vpcmpgt` well).
23444+
if (OpInRange(1, /*CmpLT*/ true, MaxConvertableBitcast) &&
23445+
OpInRange(1, /*CmpLT*/ false, MinConvertableBitcast) &&
23446+
OpInRange(0, /*CmpLT*/ true, MaxConvertableBitcast) &&
23447+
OpInRange(0, /*CmpLT*/ false, MinConvertableBitcast)) {
23448+
OkayBitcast = true;
23449+
}
23450+
// We want to convert icmp -> fcmp using `sitofp` iff one of the converts
23451+
// will be constant folded.
23452+
else if ((DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op1)) ||
23453+
DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op0)))) {
23454+
if (isUnsignedIntSetCC(Cond)) {
23455+
// For cvt + unsigned compare we need both lhs/rhs >= 0 and either lhs
23456+
// or rhs < MaxConvertableCvt
23457+
23458+
if (OpInRange(1, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
23459+
OpInRange(0, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
23460+
(OpInRange(1, /*CmpLT*/ true, MaxConvertableCvt) ||
23461+
OpInRange(0, /*CmpLT*/ true, MaxConvertableCvt)))
23462+
OkayCvt = true;
23463+
} else {
23464+
// For cvt + signed compare we need abs(lhs) or abs(rhs) <
23465+
// MaxConvertableCvt
23466+
if (OpInRange(1, /*CmpLT*/ true, MaxConvertableCvt) ||
23467+
OpInRange(1, /*CmpLT*/ false, -MaxConvertableCvt) ||
23468+
OpInRange(0, /*CmpLT*/ true, MaxConvertableCvt) ||
23469+
OpInRange(0, /*CmpLT*/ false, -MaxConvertableCvt))
23470+
OkayCvt = true;
23471+
}
23472+
}
23473+
// TODO: If we can't prove any of the ranges, we could unconditionally lower
23474+
// `(icmp eq lhs, rhs)` as `(icmp eq (int_to_fp (xor lhs, rhs)), zero)`
23475+
if (OkayBitcast || OkayCvt) {
23476+
switch (Cond) {
23477+
default:
23478+
llvm_unreachable("Unexpected SETCC condition");
23479+
// Get the new FP condition. Note for the unsigned conditions we have
23480+
// verified its okay to convert to the signed version.
23481+
case ISD::SETULT:
23482+
case ISD::SETLT:
23483+
Cond = ISD::SETOLT;
23484+
break;
23485+
case ISD::SETUGT:
23486+
case ISD::SETGT:
23487+
Cond = ISD::SETOGT;
23488+
break;
23489+
case ISD::SETULE:
23490+
case ISD::SETLE:
23491+
Cond = ISD::SETOLE;
23492+
break;
23493+
case ISD::SETUGE:
23494+
case ISD::SETGE:
23495+
Cond = ISD::SETOGE;
23496+
break;
23497+
case ISD::SETEQ:
23498+
Cond = ISD::SETOEQ;
23499+
break;
23500+
case ISD::SETNE:
23501+
Cond = ISD::SETONE;
23502+
break;
23503+
}
23504+
23505+
MVT FpVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
23506+
SDNodeFlags Flags;
23507+
Flags.setNoNaNs(true);
23508+
Flags.setNoInfs(true);
23509+
Flags.setNoSignedZeros(true);
23510+
if (OkayBitcast) {
23511+
Op0 = DAG.getBitcast(FpVT, Op0);
23512+
Op1 = DAG.getBitcast(FpVT, Op1);
23513+
} else {
23514+
Op0 = DAG.getNode(ISD::SINT_TO_FP, dl, FpVT, Op0);
23515+
Op1 = DAG.getNode(ISD::SINT_TO_FP, dl, FpVT, Op1);
23516+
}
23517+
Op0->setFlags(Flags);
23518+
Op1->setFlags(Flags);
23519+
return DAG.getSetCC(dl, VT, Op0, Op1, Cond);
23520+
}
23521+
}
23522+
2339323523
// Break 256-bit integer vector compare into smaller ones.
2339423524
if (VT.is256BitVector() && !Subtarget.hasInt256())
2339523525
return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -256,12 +256,9 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
256256
; AVX1-NEXT: vmovd %edi, %xmm0
257257
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
258258
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
259-
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
260-
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
261-
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
262-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
263-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
264-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
259+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
260+
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
261+
; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
265262
; AVX1-NEXT: retq
266263
;
267264
; AVX2-LABEL: ext_i8_8i32:
@@ -487,18 +484,12 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
487484
; AVX1-NEXT: vmovd %edi, %xmm0
488485
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
489486
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
490-
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
491-
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
492-
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
493-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
494-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
495-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
496-
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
497-
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
498-
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
499-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
500-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
501-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
487+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
488+
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
489+
; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
490+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
491+
; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
492+
; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
502493
; AVX1-NEXT: retq
503494
;
504495
; AVX2-LABEL: ext_i16_16i32:

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -320,12 +320,9 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
320320
; AVX1-NEXT: vmovd %edi, %xmm0
321321
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
322322
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
323-
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
324-
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
325-
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
326-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
327-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
328-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
323+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
324+
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
325+
; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
329326
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
330327
; AVX1-NEXT: retq
331328
;
@@ -613,20 +610,14 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
613610
; AVX1-NEXT: vmovd %edi, %xmm0
614611
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
615612
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
616-
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
617-
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
618-
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
619-
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
620-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
621-
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
613+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
614+
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
615+
; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
622616
; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
623617
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
624-
; AVX1-NEXT: vmovaps {{.*#+}} ymm3 = [256,512,1024,2048,4096,8192,16384,32768]
625-
; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
626-
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm3
627-
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
628-
; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
629-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
618+
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
619+
; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1
620+
; AVX1-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
630621
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
631622
; AVX1-NEXT: retq
632623
;

llvm/test/CodeGen/X86/cmpf-avx.ll

Lines changed: 36 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,15 @@ define <8 x i32> @cmp_eq_bitcast(<8 x i32> %x) {
66
; X86-LABEL: cmp_eq_bitcast:
77
; X86: # %bb.0:
88
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
9-
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
10-
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
11-
; X86-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
12-
; X86-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
13-
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
9+
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
10+
; X86-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
1411
; X86-NEXT: retl
1512
;
1613
; X64-LABEL: cmp_eq_bitcast:
1714
; X64: # %bb.0:
1815
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
19-
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
20-
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
21-
; X64-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
22-
; X64-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
23-
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
16+
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
17+
; X64-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2418
; X64-NEXT: retq
2519
%and = and <8 x i32> %x, <i32 7, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2620
%cmp = icmp eq <8 x i32> %and, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -29,17 +23,17 @@ define <8 x i32> @cmp_eq_bitcast(<8 x i32> %x) {
2923
}
3024

3125
define <8 x i32> @cmp_ne_sitofp(<8 x i32> %x) {
32-
; CHECK-LABEL: cmp_ne_sitofp:
33-
; CHECK: # %bb.0:
34-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
35-
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
36-
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
37-
; CHECK-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
38-
; CHECK-NEXT: vpxor %xmm3, %xmm1, %xmm1
39-
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
40-
; CHECK-NEXT: vpxor %xmm3, %xmm0, %xmm0
41-
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
42-
; CHECK-NEXT: ret{{[l|q]}}
26+
; X86-LABEL: cmp_ne_sitofp:
27+
; X86: # %bb.0:
28+
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
29+
; X86-NEXT: vcmpneq_oqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
30+
; X86-NEXT: retl
31+
;
32+
; X64-LABEL: cmp_ne_sitofp:
33+
; X64: # %bb.0:
34+
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
35+
; X64-NEXT: vcmpneq_oqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
36+
; X64-NEXT: retq
4337
%cmp = icmp ne <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
4438
%sext = sext <8 x i1> %cmp to <8 x i32>
4539
ret <8 x i32> %sext
@@ -72,14 +66,17 @@ define <8 x i32> @cmp_slt_fail_no_const(<8 x i32> %x, <8 x i32> %y) {
7266
}
7367

7468
define <8 x i32> @cmp_eq_sitofp(<8 x i32> %x) {
75-
; CHECK-LABEL: cmp_eq_sitofp:
76-
; CHECK: # %bb.0:
77-
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
78-
; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [4294967293,4294967293,4294967293,4294967293]
79-
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
80-
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
81-
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
82-
; CHECK-NEXT: ret{{[l|q]}}
69+
; X86-LABEL: cmp_eq_sitofp:
70+
; X86: # %bb.0:
71+
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
72+
; X86-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
73+
; X86-NEXT: retl
74+
;
75+
; X64-LABEL: cmp_eq_sitofp:
76+
; X64: # %bb.0:
77+
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
78+
; X64-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
79+
; X64-NEXT: retq
8380
%cmp = icmp eq <8 x i32> %x, <i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3>
8481
%sext = sext <8 x i1> %cmp to <8 x i32>
8582
ret <8 x i32> %sext
@@ -214,21 +211,15 @@ define <8 x i32> @cmp_ule_bitcast(<8 x i32> %xx) {
214211
; X86-LABEL: cmp_ule_bitcast:
215212
; X86: # %bb.0:
216213
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
217-
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
218-
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [4,4,4,4]
219-
; X86-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
220-
; X86-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
221-
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
214+
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
215+
; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
222216
; X86-NEXT: retl
223217
;
224218
; X64-LABEL: cmp_ule_bitcast:
225219
; X64: # %bb.0:
226220
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
227-
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
228-
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [4,4,4,4]
229-
; X64-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
230-
; X64-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
231-
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
221+
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
222+
; X64-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
232223
; X64-NEXT: retq
233224
%x = and <8 x i32> %xx, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
234225
%cmp = icmp ule <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -240,21 +231,17 @@ define <8 x i32> @cmp_ugt_sitofp(<8 x i32> %xx) {
240231
; X86-LABEL: cmp_ugt_sitofp:
241232
; X86: # %bb.0:
242233
; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
243-
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
244-
; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
245-
; X86-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
246-
; X86-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
247-
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
234+
; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
235+
; X86-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
236+
; X86-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
248237
; X86-NEXT: retl
249238
;
250239
; X64-LABEL: cmp_ugt_sitofp:
251240
; X64: # %bb.0:
252241
; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
253-
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
254-
; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [3,3,3,3]
255-
; X64-NEXT: vpcmpgtd %xmm2, %xmm1, %xmm1
256-
; X64-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0
257-
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
242+
; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
243+
; X64-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
244+
; X64-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
258245
; X64-NEXT: retq
259246
%x = and <8 x i32> %xx, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
260247
%cmp = icmp ugt <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>

llvm/test/CodeGen/X86/combine-testps.ll

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -171,13 +171,24 @@ define i32 @testpsz_128_signbit(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b)
171171
}
172172

173173
define i32 @testpsnzc_256_signbit(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
174-
; CHECK-LABEL: testpsnzc_256_signbit:
175-
; CHECK: # %bb.0:
176-
; CHECK-NEXT: movl %edi, %eax
177-
; CHECK-NEXT: vtestps %ymm1, %ymm0
178-
; CHECK-NEXT: cmovnel %esi, %eax
179-
; CHECK-NEXT: vzeroupper
180-
; CHECK-NEXT: retq
174+
; AVX-LABEL: testpsnzc_256_signbit:
175+
; AVX: # %bb.0:
176+
; AVX-NEXT: movl %edi, %eax
177+
; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
178+
; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
179+
; AVX-NEXT: vcmpltps %ymm2, %ymm0, %ymm0
180+
; AVX-NEXT: vtestps %ymm1, %ymm0
181+
; AVX-NEXT: cmovnel %esi, %eax
182+
; AVX-NEXT: vzeroupper
183+
; AVX-NEXT: retq
184+
;
185+
; AVX2-LABEL: testpsnzc_256_signbit:
186+
; AVX2: # %bb.0:
187+
; AVX2-NEXT: movl %edi, %eax
188+
; AVX2-NEXT: vtestps %ymm1, %ymm0
189+
; AVX2-NEXT: cmovnel %esi, %eax
190+
; AVX2-NEXT: vzeroupper
191+
; AVX2-NEXT: retq
181192
%t0 = bitcast <8 x float> %c to <8 x i32>
182193
%t1 = icmp sgt <8 x i32> zeroinitializer, %t0
183194
%t2 = sext <8 x i1> %t1 to <8 x i32>

0 commit comments

Comments
 (0)