Skip to content

Commit 42ab122

Browse files
RKSimonChenyang-L
authored andcommitted
[X86] SimplifyDemandedBitsForTargetNode - add X86ISD::ANDNP handling
Add X86ISD::ANDNP handling to targetShrinkDemandedConstant as well, which allows us to replace a lot of truncated masks with (rematerializable) allones values
1 parent 2c62c7e commit 42ab122

File tree

6 files changed

+569
-510
lines changed

6 files changed

+569
-510
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38669,14 +38669,14 @@ X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
3866938669
return false;
3867038670
};
3867138671
// For vectors - if we have a constant, then try to sign extend.
38672-
// TODO: Handle AND/ANDN cases.
38672+
// TODO: Handle AND cases.
3867338673
unsigned ActiveBits = DemandedBits.getActiveBits();
3867438674
if (EltSize > ActiveBits && EltSize > 1 && isTypeLegal(VT) &&
38675-
(Opcode == ISD::OR || Opcode == ISD::XOR) &&
38675+
(Opcode == ISD::OR || Opcode == ISD::XOR || Opcode == X86ISD::ANDNP) &&
3867638676
NeedsSignExtension(Op.getOperand(1), ActiveBits)) {
3867738677
EVT ExtSVT = EVT::getIntegerVT(*TLO.DAG.getContext(), ActiveBits);
3867838678
EVT ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtSVT,
38679-
VT.getVectorNumElements());
38679+
VT.getVectorNumElements());
3868038680
SDValue NewC =
3868138681
TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(Op), VT,
3868238682
Op.getOperand(1), TLO.DAG.getValueType(ExtVT));
@@ -43818,6 +43818,31 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
4381843818
}
4381943819
break;
4382043820
}
43821+
case X86ISD::ANDNP: {
43822+
KnownBits Known2;
43823+
SDValue Op0 = Op.getOperand(0);
43824+
SDValue Op1 = Op.getOperand(1);
43825+
43826+
if (SimplifyDemandedBits(Op1, OriginalDemandedBits, OriginalDemandedElts,
43827+
Known, TLO, Depth + 1))
43828+
return true;
43829+
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
43830+
43831+
if (SimplifyDemandedBits(Op0, ~Known.Zero & OriginalDemandedBits,
43832+
OriginalDemandedElts, Known2, TLO, Depth + 1))
43833+
return true;
43834+
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
43835+
43836+
// If the RHS is a constant, see if we can simplify it.
43837+
if (ShrinkDemandedConstant(Op, ~Known2.One & OriginalDemandedBits,
43838+
OriginalDemandedElts, TLO))
43839+
return true;
43840+
43841+
// ANDNP = (~Op0 & Op1);
43842+
Known.One &= Known2.Zero;
43843+
Known.Zero |= Known2.One;
43844+
break;
43845+
}
4382143846
case X86ISD::VSHLI: {
4382243847
SDValue Op0 = Op.getOperand(0);
4382343848

llvm/test/CodeGen/X86/fpclamptosat_vec.ll

Lines changed: 58 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,9 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) {
8282
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
8383
; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
8484
; CHECK-NEXT: pand %xmm3, %xmm0
85+
; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
8586
; CHECK-NEXT: pand %xmm0, %xmm1
86-
; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
87+
; CHECK-NEXT: pandn %xmm2, %xmm0
8788
; CHECK-NEXT: por %xmm1, %xmm0
8889
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
8990
; CHECK-NEXT: retq
@@ -270,31 +271,31 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) {
270271
; CHECK-NEXT: orq %rax, %rdx
271272
; CHECK-NEXT: movq %rdx, %xmm0
272273
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
273-
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
274-
; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [9223372039002259456,9223372039002259456]
275-
; CHECK-NEXT: movdqa %xmm0, %xmm4
276-
; CHECK-NEXT: pxor %xmm3, %xmm4
277-
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
278-
; CHECK-NEXT: movdqa {{.*#+}} xmm6 = [2147483647,2147483647,2147483647,2147483647]
279-
; CHECK-NEXT: movdqa %xmm6, %xmm7
280-
; CHECK-NEXT: pcmpgtd %xmm5, %xmm7
281-
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
282-
; CHECK-NEXT: pcmpeqd %xmm3, %xmm4
283-
; CHECK-NEXT: pand %xmm7, %xmm4
284-
; CHECK-NEXT: pand %xmm4, %xmm0
285-
; CHECK-NEXT: pandn %xmm2, %xmm4
286-
; CHECK-NEXT: por %xmm0, %xmm4
274+
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
275+
; CHECK-NEXT: movdqa %xmm0, %xmm3
276+
; CHECK-NEXT: pxor %xmm2, %xmm3
277+
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
278+
; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [2147483647,2147483647,2147483647,2147483647]
279+
; CHECK-NEXT: movdqa %xmm5, %xmm6
280+
; CHECK-NEXT: pcmpgtd %xmm4, %xmm6
281+
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
282+
; CHECK-NEXT: pcmpeqd %xmm2, %xmm3
283+
; CHECK-NEXT: pand %xmm6, %xmm3
284+
; CHECK-NEXT: pcmpeqd %xmm4, %xmm4
285+
; CHECK-NEXT: pand %xmm3, %xmm0
286+
; CHECK-NEXT: pandn %xmm4, %xmm3
287+
; CHECK-NEXT: por %xmm0, %xmm3
287288
; CHECK-NEXT: movdqa %xmm1, %xmm0
288-
; CHECK-NEXT: pxor %xmm3, %xmm0
289-
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm0[0,0,2,2]
290-
; CHECK-NEXT: pcmpgtd %xmm5, %xmm6
289+
; CHECK-NEXT: pxor %xmm2, %xmm0
290+
; CHECK-NEXT: pshufd {{.*#+}} xmm6 = xmm0[0,0,2,2]
291+
; CHECK-NEXT: pcmpgtd %xmm6, %xmm5
291292
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
292-
; CHECK-NEXT: pcmpeqd %xmm3, %xmm0
293-
; CHECK-NEXT: pand %xmm6, %xmm0
293+
; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
294+
; CHECK-NEXT: pand %xmm5, %xmm0
294295
; CHECK-NEXT: pand %xmm0, %xmm1
295-
; CHECK-NEXT: pandn %xmm2, %xmm0
296+
; CHECK-NEXT: pandn %xmm4, %xmm0
296297
; CHECK-NEXT: por %xmm1, %xmm0
297-
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm4[0,2]
298+
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
298299
; CHECK-NEXT: retq
299300
entry:
300301
%conv = fptoui <4 x float> %x to <4 x i64>
@@ -550,32 +551,32 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
550551
; CHECK-NEXT: movq %rdx, %xmm0
551552
; CHECK-NEXT: punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
552553
; CHECK-NEXT: # xmm0 = xmm0[0],mem[0]
553-
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295]
554-
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
555-
; CHECK-NEXT: movdqa %xmm0, %xmm3
556-
; CHECK-NEXT: pxor %xmm2, %xmm3
557-
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2]
558-
; CHECK-NEXT: movdqa {{.*#+}} xmm5 = [2147483647,2147483647,2147483647,2147483647]
559-
; CHECK-NEXT: movdqa %xmm5, %xmm6
560-
; CHECK-NEXT: pcmpgtd %xmm4, %xmm6
561-
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
562-
; CHECK-NEXT: pcmpeqd %xmm2, %xmm3
563-
; CHECK-NEXT: pand %xmm6, %xmm3
564-
; CHECK-NEXT: pand %xmm3, %xmm0
565-
; CHECK-NEXT: pandn %xmm1, %xmm3
566-
; CHECK-NEXT: por %xmm0, %xmm3
554+
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [9223372039002259456,9223372039002259456]
555+
; CHECK-NEXT: movdqa %xmm0, %xmm2
556+
; CHECK-NEXT: pxor %xmm1, %xmm2
557+
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
558+
; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [2147483647,2147483647,2147483647,2147483647]
559+
; CHECK-NEXT: movdqa %xmm4, %xmm5
560+
; CHECK-NEXT: pcmpgtd %xmm3, %xmm5
561+
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
562+
; CHECK-NEXT: pcmpeqd %xmm1, %xmm2
563+
; CHECK-NEXT: pand %xmm5, %xmm2
564+
; CHECK-NEXT: pcmpeqd %xmm3, %xmm3
565+
; CHECK-NEXT: pand %xmm2, %xmm0
566+
; CHECK-NEXT: pandn %xmm3, %xmm2
567+
; CHECK-NEXT: por %xmm0, %xmm2
567568
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
568569
; CHECK-NEXT: movdqa %xmm6, %xmm0
569-
; CHECK-NEXT: pxor %xmm2, %xmm0
570-
; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,0,2,2]
571-
; CHECK-NEXT: pcmpgtd %xmm4, %xmm5
570+
; CHECK-NEXT: pxor %xmm1, %xmm0
571+
; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm0[0,0,2,2]
572+
; CHECK-NEXT: pcmpgtd %xmm5, %xmm4
572573
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
573-
; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
574-
; CHECK-NEXT: pand %xmm5, %xmm0
574+
; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
575+
; CHECK-NEXT: pand %xmm4, %xmm0
575576
; CHECK-NEXT: pand %xmm0, %xmm6
576-
; CHECK-NEXT: pandn %xmm1, %xmm0
577+
; CHECK-NEXT: pandn %xmm3, %xmm0
577578
; CHECK-NEXT: por %xmm6, %xmm0
578-
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
579+
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
579580
; CHECK-NEXT: addq $72, %rsp
580581
; CHECK-NEXT: .cfi_def_cfa_offset 8
581582
; CHECK-NEXT: retq
@@ -733,8 +734,9 @@ define <2 x i16> @utest_f64i16(<2 x double> %x) {
733734
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = <2147549183,2147549183,u,u>
734735
; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
735736
; CHECK-NEXT: andpd %xmm2, %xmm0
736-
; CHECK-NEXT: andnpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
737-
; CHECK-NEXT: orpd %xmm0, %xmm2
737+
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
738+
; CHECK-NEXT: pandn %xmm1, %xmm2
739+
; CHECK-NEXT: por %xmm0, %xmm2
738740
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,2,2,3,4,5,6,7]
739741
; CHECK-NEXT: retq
740742
entry:
@@ -802,7 +804,8 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) {
802804
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2147549183,2147549183,2147549183,2147549183]
803805
; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
804806
; CHECK-NEXT: pand %xmm2, %xmm0
805-
; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
807+
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
808+
; CHECK-NEXT: pandn %xmm1, %xmm2
806809
; CHECK-NEXT: por %xmm0, %xmm2
807810
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,2,2,3,4,5,6,7]
808811
; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
@@ -1656,8 +1659,9 @@ define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
16561659
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
16571660
; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16581661
; CHECK-NEXT: pand %xmm3, %xmm0
1662+
; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
16591663
; CHECK-NEXT: pand %xmm0, %xmm1
1660-
; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1664+
; CHECK-NEXT: pandn %xmm2, %xmm0
16611665
; CHECK-NEXT: por %xmm1, %xmm0
16621666
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
16631667
; CHECK-NEXT: retq
@@ -1849,7 +1853,7 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
18491853
; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
18501854
; CHECK-NEXT: pcmpeqd %xmm2, %xmm3
18511855
; CHECK-NEXT: pand %xmm6, %xmm3
1852-
; CHECK-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967295]
1856+
; CHECK-NEXT: pcmpeqd %xmm4, %xmm4
18531857
; CHECK-NEXT: pand %xmm3, %xmm0
18541858
; CHECK-NEXT: pandn %xmm4, %xmm3
18551859
; CHECK-NEXT: por %xmm0, %xmm3
@@ -2124,7 +2128,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
21242128
; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
21252129
; CHECK-NEXT: pcmpeqd %xmm1, %xmm2
21262130
; CHECK-NEXT: pand %xmm5, %xmm2
2127-
; CHECK-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
2131+
; CHECK-NEXT: pcmpeqd %xmm3, %xmm3
21282132
; CHECK-NEXT: pand %xmm2, %xmm0
21292133
; CHECK-NEXT: pandn %xmm3, %xmm2
21302134
; CHECK-NEXT: por %xmm0, %xmm2
@@ -2292,8 +2296,9 @@ define <2 x i16> @utest_f64i16_mm(<2 x double> %x) {
22922296
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = <2147549183,2147549183,u,u>
22932297
; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
22942298
; CHECK-NEXT: andpd %xmm2, %xmm0
2295-
; CHECK-NEXT: andnpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
2296-
; CHECK-NEXT: orpd %xmm0, %xmm2
2299+
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
2300+
; CHECK-NEXT: pandn %xmm1, %xmm2
2301+
; CHECK-NEXT: por %xmm0, %xmm2
22972302
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,2,2,3,4,5,6,7]
22982303
; CHECK-NEXT: retq
22992304
entry:
@@ -2356,7 +2361,8 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) {
23562361
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2147549183,2147549183,2147549183,2147549183]
23572362
; CHECK-NEXT: pcmpgtd %xmm1, %xmm2
23582363
; CHECK-NEXT: pand %xmm2, %xmm0
2359-
; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
2364+
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
2365+
; CHECK-NEXT: pandn %xmm1, %xmm2
23602366
; CHECK-NEXT: por %xmm0, %xmm2
23612367
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,2,2,3,4,5,6,7]
23622368
; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]

0 commit comments

Comments
 (0)