Skip to content

Commit 80a328b

Browse files
committed
[X86] SimplifyDemandedVectorEltsForTargetNode - add basic PCMPEQ/PCMPGT handling
1 parent 92e5f13 commit 80a328b

10 files changed

+53
-34
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41262,6 +41262,20 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4126241262
KnownZero = LHSZero;
4126341263
break;
4126441264
}
41265+
case X86ISD::PCMPEQ:
41266+
case X86ISD::PCMPGT: {
41267+
APInt LHSUndef, LHSZero;
41268+
APInt RHSUndef, RHSZero;
41269+
SDValue LHS = Op.getOperand(0);
41270+
SDValue RHS = Op.getOperand(1);
41271+
if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO,
41272+
Depth + 1))
41273+
return true;
41274+
if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO,
41275+
Depth + 1))
41276+
return true;
41277+
break;
41278+
}
4126541279
case X86ISD::KSHIFTL: {
4126641280
SDValue Src = Op.getOperand(0);
4126741281
auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));

llvm/test/CodeGen/X86/horizontal-reduce-umax.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
635635
; X64-AVX2-LABEL: test_reduce_v4i64:
636636
; X64-AVX2: ## %bb.0:
637637
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
638-
; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
638+
; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
639639
; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
640640
; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
641641
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3

llvm/test/CodeGen/X86/horizontal-reduce-umin.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
581581
; X64-AVX2-LABEL: test_reduce_v4i64:
582582
; X64-AVX2: ## %bb.0:
583583
; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
584-
; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
584+
; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
585585
; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
586586
; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
587587
; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3

llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -267,11 +267,13 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
267267
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
268268
; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
269269
; SSE41-NEXT: pxor %xmm1, %xmm0
270-
; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
271-
; SSE41-NEXT: pxor %xmm1, %xmm2
270+
; SSE41-NEXT: movl $3, %eax
271+
; SSE41-NEXT: movq %rax, %xmm3
272+
; SSE41-NEXT: pcmpeqq %xmm2, %xmm3
273+
; SSE41-NEXT: pxor %xmm1, %xmm3
272274
; SSE41-NEXT: movd %xmm0, %eax
273275
; SSE41-NEXT: pextrb $8, %xmm0, %edx
274-
; SSE41-NEXT: pextrb $0, %xmm2, %ecx
276+
; SSE41-NEXT: pextrb $0, %xmm3, %ecx
275277
; SSE41-NEXT: # kill: def $al killed $al killed $eax
276278
; SSE41-NEXT: # kill: def $dl killed $dl killed $edx
277279
; SSE41-NEXT: # kill: def $cl killed $cl killed $ecx
@@ -318,7 +320,9 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
318320
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
319321
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
320322
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
321-
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
323+
; AVX1-NEXT: movl $3, %eax
324+
; AVX1-NEXT: vmovq %rax, %xmm2
325+
; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
322326
; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
323327
; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
324328
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1

llvm/test/CodeGen/X86/test-shrink-bug.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ define dso_local void @fail(i16 %a, <2 x i8> %b) {
6767
; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107
6868
; CHECK-X64-NEXT: je .LBB1_3
6969
; CHECK-X64-NEXT: # %bb.1:
70-
; CHECK-X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
70+
; CHECK-X64-NEXT: pslld $8, %xmm0
7171
; CHECK-X64-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
7272
; CHECK-X64-NEXT: pextrw $1, %xmm0, %eax
7373
; CHECK-X64-NEXT: testb $1, %al

llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,6 @@ define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind {
264264
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
265265
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
266266
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
267-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
268267
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
269268
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
270269
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]

llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,7 @@ define <4 x i1> @t0_all_tautological(<4 x i32> %X) nounwind {
2525
define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind {
2626
; CHECK-SSE2-LABEL: t1_all_odd_eq:
2727
; CHECK-SSE2: # %bb.0:
28-
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
29-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
30-
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
31-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
32-
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
33-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
34-
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
28+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3529
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3630
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
3731
; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -82,13 +76,7 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind {
8276
define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind {
8377
; CHECK-SSE2-LABEL: t1_all_odd_ne:
8478
; CHECK-SSE2: # %bb.0:
85-
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
86-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
87-
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
88-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
89-
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
90-
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
91-
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
79+
; CHECK-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9280
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
9381
; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
9482
; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -256,7 +244,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
256244
; CHECK-AVX1-NEXT: vpsllq $32, %xmm0, %xmm0
257245
; CHECK-AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0
258246
; CHECK-AVX1-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
259-
; CHECK-AVX1-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
247+
; CHECK-AVX1-NEXT: movabsq $-3074457345618258603, %rax # imm = 0xD555555555555555
248+
; CHECK-AVX1-NEXT: vmovq %rax, %xmm1
249+
; CHECK-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
260250
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
261251
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
262252
; CHECK-AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
@@ -273,7 +263,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind {
273263
; CHECK-AVX2-NEXT: vpsllq $32, %xmm0, %xmm0
274264
; CHECK-AVX2-NEXT: vpaddq %xmm0, %xmm2, %xmm0
275265
; CHECK-AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
276-
; CHECK-AVX2-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
266+
; CHECK-AVX2-NEXT: movabsq $-3074457345618258603, %rax # imm = 0xD555555555555555
267+
; CHECK-AVX2-NEXT: vmovq %rax, %xmm1
268+
; CHECK-AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
277269
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
278270
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
279271
; CHECK-AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero

llvm/test/CodeGen/X86/vector-reduce-umax.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
210210
; AVX2-LABEL: test_v4i64:
211211
; AVX2: # %bb.0:
212212
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
213-
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
213+
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
214214
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm3
215215
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm4
216216
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3

llvm/test/CodeGen/X86/vector-reduce-umin.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ define i64 @test_v4i64(<4 x i64> %a0) {
211211
; AVX2-LABEL: test_v4i64:
212212
; AVX2: # %bb.0:
213213
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
214-
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
214+
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
215215
; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
216216
; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
217217
; AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3

llvm/test/CodeGen/X86/vselect.ll

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -741,14 +741,24 @@ define i64 @vselect_any_extend_vector_inreg_crash(ptr %x) {
741741
; SSE-NEXT: shll $15, %eax
742742
; SSE-NEXT: retq
743743
;
744-
; AVX-LABEL: vselect_any_extend_vector_inreg_crash:
745-
; AVX: # %bb.0:
746-
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
747-
; AVX-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
748-
; AVX-NEXT: vmovd %xmm0, %eax
749-
; AVX-NEXT: andl $1, %eax
750-
; AVX-NEXT: shll $15, %eax
751-
; AVX-NEXT: retq
744+
; AVX1-LABEL: vselect_any_extend_vector_inreg_crash:
745+
; AVX1: # %bb.0:
746+
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
747+
; AVX1-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
748+
; AVX1-NEXT: vmovd %xmm0, %eax
749+
; AVX1-NEXT: andl $1, %eax
750+
; AVX1-NEXT: shll $15, %eax
751+
; AVX1-NEXT: retq
752+
;
753+
; AVX2-LABEL: vselect_any_extend_vector_inreg_crash:
754+
; AVX2: # %bb.0:
755+
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
756+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [49,49,49,49]
757+
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
758+
; AVX2-NEXT: vmovd %xmm0, %eax
759+
; AVX2-NEXT: andl $1, %eax
760+
; AVX2-NEXT: shll $15, %eax
761+
; AVX2-NEXT: retq
752762
0:
753763
%1 = load <8 x i8>, ptr %x
754764
%2 = icmp eq <8 x i8> %1, <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>

0 commit comments

Comments
 (0)