Skip to content

Commit f58e051

Browse files
committed
[x86] try harder to lower to PCMPGT instead of not-of-PCMPEQ
This is motivated by the example in https://llvm.org/PR50055 , but it doesn't do anything for that bug currently because we don't actually have a zero-extended setcc there. Proof for the generic transform (inverse of what we would try to do in combining): https://alive2.llvm.org/ce/z/aBL-Mg Differential Revision: https://reviews.llvm.org/D102275
1 parent 24d06ff commit f58e051

File tree

4 files changed

+31
-37
lines changed

4 files changed

+31
-37
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23078,17 +23078,20 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
2307823078
return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);
2307923079
}
2308023080

23081-
// If this is a SETNE against the signed minimum value, change it to SETGT.
23082-
// If this is a SETNE against the signed maximum value, change it to SETLT.
23083-
// which will be swapped to SETGT.
23084-
// Otherwise we use PCMPEQ+invert.
23081+
// If we have a limit constant, try to form PCMPGT (signed cmp) to avoid
23082+
// not-of-PCMPEQ:
23083+
// X != INT_MIN --> X >s INT_MIN
23084+
// X != INT_MAX --> X <s INT_MAX --> INT_MAX >s X
23085+
// +X != 0 --> +X >s 0
2308523086
APInt ConstValue;
2308623087
if (Cond == ISD::SETNE &&
2308723088
ISD::isConstantSplatVector(Op1.getNode(), ConstValue)) {
2308823089
if (ConstValue.isMinSignedValue())
2308923090
Cond = ISD::SETGT;
2309023091
else if (ConstValue.isMaxSignedValue())
2309123092
Cond = ISD::SETLT;
23093+
else if (ConstValue.isNullValue() && DAG.SignBitIsZero(Op0))
23094+
Cond = ISD::SETGT;
2309223095
}
2309323096

2309423097
// If both operands are known non-negative, then an unsigned compare is the

llvm/test/CodeGen/X86/setcc-lowering.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -109,18 +109,14 @@ define <4 x i32> @pcmpgt(<4 x i8> %x) {
109109
; AVX: # %bb.0:
110110
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
111111
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
112-
; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
113-
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
114-
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
112+
; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
115113
; AVX-NEXT: retq
116114
;
117115
; KNL-32-LABEL: pcmpgt:
118116
; KNL-32: # %bb.0:
119117
; KNL-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
120118
; KNL-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
121-
; KNL-32-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
122-
; KNL-32-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0
123-
; KNL-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
119+
; KNL-32-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
124120
; KNL-32-NEXT: retl
125121
%zext = zext <4 x i8> %x to <4 x i32>
126122
%icmp = icmp ne <4 x i32> %zext, zeroinitializer

llvm/test/CodeGen/X86/vec_umulo.ll

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2666,14 +2666,13 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
26662666
; SSE2-NEXT: pxor %xmm4, %xmm4
26672667
; SSE2-NEXT: pcmpeqd %xmm4, %xmm3
26682668
; SSE2-NEXT: pcmpeqd %xmm5, %xmm5
2669-
; SSE2-NEXT: pxor %xmm5, %xmm3
2669+
; SSE2-NEXT: pxor %xmm3, %xmm5
26702670
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
2671-
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,2,2,3]
2672-
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
2671+
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
2672+
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
26732673
; SSE2-NEXT: psrld $24, %xmm0
2674-
; SSE2-NEXT: pcmpeqd %xmm4, %xmm0
2675-
; SSE2-NEXT: pxor %xmm5, %xmm0
2676-
; SSE2-NEXT: por %xmm3, %xmm0
2674+
; SSE2-NEXT: pcmpgtd %xmm4, %xmm0
2675+
; SSE2-NEXT: por %xmm5, %xmm0
26772676
; SSE2-NEXT: movd %xmm2, %eax
26782677
; SSE2-NEXT: movw %ax, (%rdi)
26792678
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
@@ -2710,14 +2709,13 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
27102709
; SSSE3-NEXT: pxor %xmm4, %xmm4
27112710
; SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
27122711
; SSSE3-NEXT: pcmpeqd %xmm5, %xmm5
2713-
; SSSE3-NEXT: pxor %xmm5, %xmm3
2712+
; SSSE3-NEXT: pxor %xmm3, %xmm5
27142713
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
2715-
; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm1[0,2,2,3]
2716-
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1]
2714+
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,2,2,3]
2715+
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
27172716
; SSSE3-NEXT: psrld $24, %xmm0
2718-
; SSSE3-NEXT: pcmpeqd %xmm4, %xmm0
2719-
; SSSE3-NEXT: pxor %xmm5, %xmm0
2720-
; SSSE3-NEXT: por %xmm3, %xmm0
2717+
; SSSE3-NEXT: pcmpgtd %xmm4, %xmm0
2718+
; SSSE3-NEXT: por %xmm5, %xmm0
27212719
; SSSE3-NEXT: movd %xmm2, %eax
27222720
; SSSE3-NEXT: movw %ax, (%rdi)
27232721
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
@@ -2751,19 +2749,18 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
27512749
; SSE41-NEXT: pmuludq %xmm1, %xmm0
27522750
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
27532751
; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
2754-
; SSE41-NEXT: pxor %xmm0, %xmm0
2755-
; SSE41-NEXT: pcmpeqd %xmm0, %xmm4
2756-
; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
2757-
; SSE41-NEXT: pxor %xmm3, %xmm4
2752+
; SSE41-NEXT: pxor %xmm3, %xmm3
2753+
; SSE41-NEXT: pcmpeqd %xmm3, %xmm4
2754+
; SSE41-NEXT: pcmpeqd %xmm0, %xmm0
2755+
; SSE41-NEXT: pxor %xmm4, %xmm0
27582756
; SSE41-NEXT: pmulld %xmm2, %xmm1
27592757
; SSE41-NEXT: pextrd $3, %xmm1, %eax
27602758
; SSE41-NEXT: pextrd $2, %xmm1, %ecx
27612759
; SSE41-NEXT: pextrd $1, %xmm1, %edx
27622760
; SSE41-NEXT: movd %xmm1, %esi
27632761
; SSE41-NEXT: psrld $24, %xmm1
2764-
; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
2765-
; SSE41-NEXT: pxor %xmm3, %xmm0
2766-
; SSE41-NEXT: por %xmm4, %xmm0
2762+
; SSE41-NEXT: pcmpgtd %xmm3, %xmm1
2763+
; SSE41-NEXT: por %xmm1, %xmm0
27672764
; SSE41-NEXT: movw %ax, 9(%rdi)
27682765
; SSE41-NEXT: movw %cx, 6(%rdi)
27692766
; SSE41-NEXT: movw %dx, 3(%rdi)
@@ -2795,8 +2792,7 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
27952792
; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
27962793
; AVX1-NEXT: vpmulld %xmm1, %xmm0, %xmm1
27972794
; AVX1-NEXT: vpsrld $24, %xmm1, %xmm0
2798-
; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2799-
; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
2795+
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm0, %xmm0
28002796
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
28012797
; AVX1-NEXT: vpextrd $3, %xmm1, %eax
28022798
; AVX1-NEXT: movw %ax, 9(%rdi)
@@ -2833,8 +2829,7 @@ define <4 x i32> @umulo_v4i24(<4 x i24> %a0, <4 x i24> %a1, <4 x i24>* %p2) noun
28332829
; AVX2-NEXT: vpxor %xmm4, %xmm2, %xmm2
28342830
; AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm1
28352831
; AVX2-NEXT: vpsrld $24, %xmm1, %xmm0
2836-
; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2837-
; AVX2-NEXT: vpxor %xmm4, %xmm0, %xmm0
2832+
; AVX2-NEXT: vpcmpgtd %xmm3, %xmm0, %xmm0
28382833
; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0
28392834
; AVX2-NEXT: vpextrd $3, %xmm1, %eax
28402835
; AVX2-NEXT: movw %ax, 9(%rdi)

llvm/test/CodeGen/X86/vsel-cmp-load.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ define <4 x i64> @ne_zero(<4 x i16>* %p, <4 x i64> %x, <4 x i64> %y) {
5757
; AVX2: # %bb.0:
5858
; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
5959
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
60-
; AVX2-NEXT: vpcmpeqq %ymm3, %ymm2, %ymm2
61-
; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
60+
; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2
61+
; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
6262
; AVX2-NEXT: retq
6363
;
6464
; AVX512-LABEL: ne_zero:
@@ -189,8 +189,8 @@ define <8 x float> @ne_zero_fp_select(<8 x i8>* %p, <8 x float> %x, <8 x float>
189189
; AVX2: # %bb.0:
190190
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
191191
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
192-
; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
193-
; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0
192+
; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2
193+
; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
194194
; AVX2-NEXT: retq
195195
;
196196
; AVX512-LABEL: ne_zero_fp_select:

0 commit comments

Comments
 (0)