Skip to content

Commit 8347d7c

Browse files
committed
Revert "Revert "[X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero""
This reverts commit 460bba3. Change does not pass check-llvm.
1 parent 294ad08 commit 8347d7c

File tree

5 files changed

+46
-63
lines changed

5 files changed

+46
-63
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53470,6 +53470,7 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
5347053470
SDLoc dl(N);
5347153471
SDValue Op = N->getOperand(0);
5347253472
EVT VT = Op.getValueType();
53473+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5347353474

5347453475
// If we have a constant logical shift that's only used in a comparison
5347553476
// against zero turn it into an equivalent AND. This allows turning it into
@@ -53493,12 +53494,41 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
5349353494
}
5349453495
}
5349553496

53497+
// If we're extracting from a avx512 bool vector and comparing against zero,
53498+
// then try to just bitcast the vector to an integer to use TEST/BT directly.
53499+
// (and (extract_elt (kshiftr vXi1, C), 0), 1) -> (and (bc vXi1), 1<<C)
53500+
if (Op.getOpcode() == ISD::AND && isOneConstant(Op.getOperand(1)) &&
53501+
Op.hasOneUse() && onlyZeroFlagUsed(SDValue(N, 0))) {
53502+
SDValue Src = Op.getOperand(0);
53503+
if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
53504+
isNullConstant(Src.getOperand(1)) &&
53505+
Src.getOperand(0).getValueType().getScalarType() == MVT::i1) {
53506+
SDValue BoolVec = Src.getOperand(0);
53507+
unsigned ShAmt = 0;
53508+
if (BoolVec.getOpcode() == X86ISD::KSHIFTR) {
53509+
ShAmt = BoolVec.getConstantOperandVal(1);
53510+
BoolVec = BoolVec.getOperand(0);
53511+
}
53512+
BoolVec = widenMaskVector(BoolVec, false, Subtarget, DAG, dl);
53513+
EVT VecVT = BoolVec.getValueType();
53514+
unsigned BitWidth = VecVT.getVectorNumElements();
53515+
EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), BitWidth);
53516+
if (TLI.isTypeLegal(VecVT) && TLI.isTypeLegal(BCVT)) {
53517+
APInt Mask = APInt::getOneBitSet(BitWidth, ShAmt);
53518+
Op = DAG.getBitcast(BCVT, BoolVec);
53519+
Op = DAG.getNode(ISD::AND, dl, BCVT, Op,
53520+
DAG.getConstant(Mask, dl, BCVT));
53521+
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
53522+
DAG.getConstant(0, dl, VT));
53523+
}
53524+
}
53525+
}
53526+
5349653527
// Peek through any zero-extend if we're only testing for a zero result.
5349753528
if (Op.getOpcode() == ISD::ZERO_EXTEND && onlyZeroFlagUsed(SDValue(N, 0))) {
5349853529
SDValue Src = Op.getOperand(0);
5349953530
EVT SrcVT = Src.getValueType();
53500-
if (SrcVT.getScalarSizeInBits() >= 8 &&
53501-
DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
53531+
if (SrcVT.getScalarSizeInBits() >= 8 && TLI.isTypeLegal(SrcVT))
5350253532
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Src,
5350353533
DAG.getConstant(0, dl, SrcVT));
5350453534
}

llvm/test/CodeGen/X86/avx512-insert-extract.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,8 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) nounwind {
175175
; KNL-LABEL: test11:
176176
; KNL: ## %bb.0:
177177
; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0
178-
; KNL-NEXT: kshiftrw $4, %k0, %k0
179178
; KNL-NEXT: kmovw %k0, %eax
180-
; KNL-NEXT: testb $1, %al
179+
; KNL-NEXT: testb $16, %al
181180
; KNL-NEXT: je LBB10_2
182181
; KNL-NEXT: ## %bb.1: ## %A
183182
; KNL-NEXT: vmovdqa64 %zmm1, %zmm0
@@ -189,9 +188,8 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) nounwind {
189188
; SKX-LABEL: test11:
190189
; SKX: ## %bb.0:
191190
; SKX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
192-
; SKX-NEXT: kshiftrw $4, %k0, %k0
193191
; SKX-NEXT: kmovd %k0, %eax
194-
; SKX-NEXT: testb $1, %al
192+
; SKX-NEXT: testb $16, %al
195193
; SKX-NEXT: je LBB10_2
196194
; SKX-NEXT: ## %bb.1: ## %A
197195
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
@@ -276,9 +274,8 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) nounwind {
276274
; KNL: ## %bb.0:
277275
; KNL-NEXT: movq %rdi, %rax
278276
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
279-
; KNL-NEXT: kshiftrw $4, %k0, %k0
280277
; KNL-NEXT: kmovw %k0, %ecx
281-
; KNL-NEXT: testb $1, %cl
278+
; KNL-NEXT: testb $16, %cl
282279
; KNL-NEXT: cmoveq %rsi, %rax
283280
; KNL-NEXT: vzeroupper
284281
; KNL-NEXT: retq
@@ -287,9 +284,8 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) nounwind {
287284
; SKX: ## %bb.0:
288285
; SKX-NEXT: movq %rdi, %rax
289286
; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
290-
; SKX-NEXT: kshiftrb $4, %k0, %k0
291287
; SKX-NEXT: kmovd %k0, %ecx
292-
; SKX-NEXT: testb $1, %cl
288+
; SKX-NEXT: testb $16, %cl
293289
; SKX-NEXT: cmoveq %rsi, %rax
294290
; SKX-NEXT: vzeroupper
295291
; SKX-NEXT: retq

llvm/test/CodeGen/X86/movmsk-cmp.ll

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4350,14 +4350,8 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
43504350
; KNL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
43514351
; KNL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
43524352
; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
4353-
; KNL-NEXT: kshiftrw $1, %k0, %k1
4354-
; KNL-NEXT: kmovw %k1, %eax
43554353
; KNL-NEXT: kmovw %k0, %ecx
4356-
<<<<<<< HEAD
43574354
; KNL-NEXT: testb $2, %cl
4358-
=======
4359-
; KNL-NEXT: testb $1, %al
4360-
>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
43614355
; KNL-NEXT: movl $42, %eax
43624356
; KNL-NEXT: movl $99, %edx
43634357
; KNL-NEXT: cmovel %edx, %eax
@@ -4369,14 +4363,8 @@ define i32 @PR39665_c_ray(<2 x double> %x, <2 x double> %y) {
43694363
; SKX-LABEL: PR39665_c_ray:
43704364
; SKX: # %bb.0:
43714365
; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
4372-
; SKX-NEXT: kshiftrb $1, %k0, %k1
4373-
; SKX-NEXT: kmovd %k1, %eax
43744366
; SKX-NEXT: kmovd %k0, %ecx
4375-
<<<<<<< HEAD
43764367
; SKX-NEXT: testb $2, %cl
4377-
=======
4378-
; SKX-NEXT: testb $1, %al
4379-
>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
43804368
; SKX-NEXT: movl $42, %eax
43814369
; SKX-NEXT: movl $99, %edx
43824370
; SKX-NEXT: cmovel %edx, %eax

llvm/test/CodeGen/X86/pr33349.ll

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,36 +11,28 @@ target triple = "x86_64-unknown-linux-gnu"
1111
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
1212
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
1313
; KNL-NEXT: kshiftrw $2, %k0, %k1
14-
; KNL-NEXT: kshiftrw $1, %k1, %k2
1514
; KNL-NEXT: kmovw %k1, %eax
1615
; KNL-NEXT: testb $1, %al
1716
; KNL-NEXT: fld1
1817
; KNL-NEXT: fldz
1918
; KNL-NEXT: fld %st(0)
2019
; KNL-NEXT: fcmovne %st(2), %st
21-
; KNL-NEXT: kmovw %k2, %eax
22-
; KNL-NEXT: testb $1, %al
20+
; KNL-NEXT: testb $2, %al
2321
; KNL-NEXT: fld %st(1)
2422
; KNL-NEXT: fcmovne %st(3), %st
25-
<<<<<<< HEAD
2623
; KNL-NEXT: kmovw %k0, %eax
27-
=======
28-
; KNL-NEXT: kshiftrw $1, %k0, %k1
29-
; KNL-NEXT: kmovw %k1, %eax
30-
>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
3124
; KNL-NEXT: testb $1, %al
3225
; KNL-NEXT: fld %st(2)
3326
; KNL-NEXT: fcmovne %st(4), %st
34-
; KNL-NEXT: kmovw %k0, %eax
35-
; KNL-NEXT: testb $1, %al
27+
; KNL-NEXT: testb $2, %al
3628
; KNL-NEXT: fxch %st(3)
3729
; KNL-NEXT: fcmovne %st(4), %st
3830
; KNL-NEXT: fstp %st(4)
3931
; KNL-NEXT: fxch %st(3)
40-
; KNL-NEXT: fstpt (%rdi)
41-
; KNL-NEXT: fxch %st(1)
4232
; KNL-NEXT: fstpt 10(%rdi)
4333
; KNL-NEXT: fxch %st(1)
34+
; KNL-NEXT: fstpt (%rdi)
35+
; KNL-NEXT: fxch %st(1)
4436
; KNL-NEXT: fstpt 30(%rdi)
4537
; KNL-NEXT: fstpt 20(%rdi)
4638
; KNL-NEXT: vzeroupper
@@ -51,36 +43,28 @@ target triple = "x86_64-unknown-linux-gnu"
5143
; SKX-NEXT: vpslld $31, %xmm0, %xmm0
5244
; SKX-NEXT: vpmovd2m %xmm0, %k0
5345
; SKX-NEXT: kshiftrb $2, %k0, %k1
54-
; SKX-NEXT: kshiftrb $1, %k1, %k2
5546
; SKX-NEXT: kmovd %k1, %eax
5647
; SKX-NEXT: testb $1, %al
5748
; SKX-NEXT: fld1
5849
; SKX-NEXT: fldz
5950
; SKX-NEXT: fld %st(0)
6051
; SKX-NEXT: fcmovne %st(2), %st
61-
; SKX-NEXT: kmovd %k2, %eax
62-
; SKX-NEXT: testb $1, %al
52+
; SKX-NEXT: testb $2, %al
6353
; SKX-NEXT: fld %st(1)
6454
; SKX-NEXT: fcmovne %st(3), %st
65-
<<<<<<< HEAD
6655
; SKX-NEXT: kmovd %k0, %eax
67-
=======
68-
; SKX-NEXT: kshiftrb $1, %k0, %k1
69-
; SKX-NEXT: kmovd %k1, %eax
70-
>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
7156
; SKX-NEXT: testb $1, %al
7257
; SKX-NEXT: fld %st(2)
7358
; SKX-NEXT: fcmovne %st(4), %st
74-
; SKX-NEXT: kmovd %k0, %eax
75-
; SKX-NEXT: testb $1, %al
59+
; SKX-NEXT: testb $2, %al
7660
; SKX-NEXT: fxch %st(3)
7761
; SKX-NEXT: fcmovne %st(4), %st
7862
; SKX-NEXT: fstp %st(4)
7963
; SKX-NEXT: fxch %st(3)
80-
; SKX-NEXT: fstpt (%rdi)
81-
; SKX-NEXT: fxch %st(1)
8264
; SKX-NEXT: fstpt 10(%rdi)
8365
; SKX-NEXT: fxch %st(1)
66+
; SKX-NEXT: fstpt (%rdi)
67+
; SKX-NEXT: fxch %st(1)
8468
; SKX-NEXT: fstpt 30(%rdi)
8569
; SKX-NEXT: fstpt 20(%rdi)
8670
; SKX-NEXT: retq

llvm/test/CodeGen/X86/pr34177.ll

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -49,35 +49,20 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
4949
; AVX512VL-LABEL: test:
5050
; AVX512VL: # %bb.0:
5151
; AVX512VL-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k0
52-
<<<<<<< HEAD
5352
; AVX512VL-NEXT: kshiftrb $2, %k0, %k1
5453
; AVX512VL-NEXT: kmovd %k0, %eax
5554
; AVX512VL-NEXT: testb $2, %al
56-
=======
57-
; AVX512VL-NEXT: kshiftrb $1, %k0, %k1
58-
; AVX512VL-NEXT: kshiftrb $2, %k0, %k2
59-
; AVX512VL-NEXT: kmovd %k0, %eax
60-
; AVX512VL-NEXT: testb $1, %al
61-
>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
6255
; AVX512VL-NEXT: fld1
6356
; AVX512VL-NEXT: fldz
6457
; AVX512VL-NEXT: fld %st(0)
6558
; AVX512VL-NEXT: fcmovne %st(2), %st
66-
; AVX512VL-NEXT: kmovd %k1, %eax
6759
; AVX512VL-NEXT: testb $1, %al
6860
; AVX512VL-NEXT: fld %st(1)
6961
; AVX512VL-NEXT: fcmovne %st(3), %st
70-
<<<<<<< HEAD
7162
; AVX512VL-NEXT: kmovd %k1, %eax
7263
; AVX512VL-NEXT: testb $2, %al
73-
=======
74-
; AVX512VL-NEXT: kshiftrb $1, %k2, %k0
75-
; AVX512VL-NEXT: kmovd %k0, %eax
76-
; AVX512VL-NEXT: testb $1, %al
77-
>>>>>>> parent of 239ab16ec121 ([X86] combineCMP - attempt to simplify KSHIFTR mask element extractions when just comparing against zero)
7864
; AVX512VL-NEXT: fld %st(2)
7965
; AVX512VL-NEXT: fcmovne %st(4), %st
80-
; AVX512VL-NEXT: kmovd %k2, %eax
8166
; AVX512VL-NEXT: testb $1, %al
8267
; AVX512VL-NEXT: fxch %st(3)
8368
; AVX512VL-NEXT: fcmovne %st(4), %st
@@ -92,10 +77,10 @@ define void @test(<4 x i64> %a, <4 x x86_fp80> %b, ptr %c) local_unnamed_addr {
9277
; AVX512VL-NEXT: fstpt 10(%rdi)
9378
; AVX512VL-NEXT: fxch %st(1)
9479
; AVX512VL-NEXT: fadd %st, %st(0)
95-
; AVX512VL-NEXT: fstpt (%rdi)
96-
; AVX512VL-NEXT: fadd %st, %st(0)
9780
; AVX512VL-NEXT: fstpt 20(%rdi)
9881
; AVX512VL-NEXT: fadd %st, %st(0)
82+
; AVX512VL-NEXT: fstpt (%rdi)
83+
; AVX512VL-NEXT: fadd %st, %st(0)
9984
; AVX512VL-NEXT: fstpt 60(%rdi)
10085
; AVX512VL-NEXT: fadd %st, %st(0)
10186
; AVX512VL-NEXT: fstpt 40(%rdi)

0 commit comments

Comments
 (0)