Skip to content

Commit 61d5add

Browse files
committed
[X86] SimplifyDemandedBitsForTargetNode - call SimplifyMultipleUseDemandedBits on SSE shift-by-immediate nodes.
Attempt to peek through multiple-use SHLI/SRLI/SRAI source vectors.
1 parent 02e5c25 commit 61d5add

11 files changed

+476
-446
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43418,6 +43418,16 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
4341843418

4341943419
// Low bits known zero.
4342043420
Known.Zero.setLowBits(ShAmt);
43421+
43422+
if (!OriginalDemandedBits.isSubsetOf(Known.Zero | Known.One)) {
43423+
// Attempt to avoid multi-use ops if we don't need anything from them.
43424+
if (SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
43425+
Op0, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1)) {
43426+
SDValue NewOp =
43427+
TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedOp0, Op1);
43428+
return TLO.CombineTo(Op, NewOp);
43429+
}
43430+
}
4342143431
return false;
4342243432
}
4342343433
case X86ISD::VSRLI: {
@@ -43439,6 +43449,16 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
4343943449

4344043450
// High bits known zero.
4344143451
Known.Zero.setHighBits(ShAmt);
43452+
43453+
if (!OriginalDemandedBits.isSubsetOf(Known.Zero | Known.One)) {
43454+
// Attempt to avoid multi-use ops if we don't need anything from them.
43455+
if (SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
43456+
Op0, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1)) {
43457+
SDValue NewOp =
43458+
TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedOp0, Op1);
43459+
return TLO.CombineTo(Op, NewOp);
43460+
}
43461+
}
4344243462
return false;
4344343463
}
4344443464
case X86ISD::VSRAI: {
@@ -43486,6 +43506,16 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
4348643506
// High bits are known one.
4348743507
if (Known.One[BitWidth - ShAmt - 1])
4348843508
Known.One.setHighBits(ShAmt);
43509+
43510+
if (!OriginalDemandedBits.isSubsetOf(Known.Zero | Known.One)) {
43511+
// Attempt to avoid multi-use ops if we don't need anything from them.
43512+
if (SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
43513+
Op0, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1)) {
43514+
SDValue NewOp =
43515+
TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedOp0, Op1);
43516+
return TLO.CombineTo(Op, NewOp);
43517+
}
43518+
}
4348943519
return false;
4349043520
}
4349143521
case X86ISD::BLENDV: {

llvm/test/CodeGen/X86/combine-sdiv.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1632,7 +1632,7 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
16321632
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
16331633
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
16341634
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
1635-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1635+
; SSE2-NEXT: movdqa %xmm1, %xmm2
16361636
; SSE2-NEXT: psrad $31, %xmm2
16371637
; SSE2-NEXT: movdqa %xmm2, %xmm3
16381638
; SSE2-NEXT: psrlq $61, %xmm3
@@ -1659,7 +1659,7 @@ define <4 x i64> @combine_vec_sdiv_by_pow2b_v4i64(<4 x i64> %x) {
16591659
; SSE41-NEXT: psrlq $2, %xmm2
16601660
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
16611661
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
1662-
; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1662+
; SSE41-NEXT: movdqa %xmm1, %xmm2
16631663
; SSE41-NEXT: psrad $31, %xmm2
16641664
; SSE41-NEXT: movdqa %xmm2, %xmm3
16651665
; SSE41-NEXT: psrlq $60, %xmm3
@@ -1775,7 +1775,7 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
17751775
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
17761776
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
17771777
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3]
1778-
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
1778+
; SSE2-NEXT: movdqa %xmm1, %xmm4
17791779
; SSE2-NEXT: psrad $31, %xmm4
17801780
; SSE2-NEXT: movdqa %xmm4, %xmm5
17811781
; SSE2-NEXT: psrlq $61, %xmm5
@@ -1789,7 +1789,7 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
17891789
; SSE2-NEXT: movapd {{.*#+}} xmm4 = [1152921504606846976,576460752303423488]
17901790
; SSE2-NEXT: xorpd %xmm4, %xmm1
17911791
; SSE2-NEXT: psubq %xmm4, %xmm1
1792-
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3]
1792+
; SSE2-NEXT: movdqa %xmm3, %xmm5
17931793
; SSE2-NEXT: psrad $31, %xmm5
17941794
; SSE2-NEXT: movdqa %xmm5, %xmm6
17951795
; SSE2-NEXT: psrlq $61, %xmm6
@@ -1824,7 +1824,7 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
18241824
; SSE41-NEXT: psrlq $2, %xmm4
18251825
; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm5[2,3],xmm4[4,5],xmm5[6,7]
18261826
; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7]
1827-
; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
1827+
; SSE41-NEXT: movdqa %xmm1, %xmm4
18281828
; SSE41-NEXT: psrad $31, %xmm4
18291829
; SSE41-NEXT: movdqa %xmm4, %xmm5
18301830
; SSE41-NEXT: psrlq $60, %xmm5
@@ -1838,7 +1838,7 @@ define <8 x i64> @combine_vec_sdiv_by_pow2b_v8i64(<8 x i64> %x) {
18381838
; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [1152921504606846976,576460752303423488]
18391839
; SSE41-NEXT: pxor %xmm4, %xmm1
18401840
; SSE41-NEXT: psubq %xmm4, %xmm1
1841-
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3]
1841+
; SSE41-NEXT: movdqa %xmm3, %xmm5
18421842
; SSE41-NEXT: psrad $31, %xmm5
18431843
; SSE41-NEXT: movdqa %xmm5, %xmm6
18441844
; SSE41-NEXT: psrlq $60, %xmm6

llvm/test/CodeGen/X86/combine-srem.ll

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -255,35 +255,35 @@ define <4 x i32> @combine_vec_srem_by_pow2b(<4 x i32> %x) {
255255
; SSE-LABEL: combine_vec_srem_by_pow2b:
256256
; SSE: # %bb.0:
257257
; SSE-NEXT: movdqa %xmm0, %xmm1
258-
; SSE-NEXT: psrad $31, %xmm1
259-
; SSE-NEXT: movdqa %xmm1, %xmm2
260-
; SSE-NEXT: psrld $29, %xmm2
261-
; SSE-NEXT: movdqa %xmm1, %xmm3
262-
; SSE-NEXT: psrld $31, %xmm3
263-
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
264-
; SSE-NEXT: psrld $30, %xmm1
265-
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
266-
; SSE-NEXT: paddd %xmm0, %xmm1
267-
; SSE-NEXT: movdqa %xmm1, %xmm2
268-
; SSE-NEXT: psrad $3, %xmm2
269-
; SSE-NEXT: movdqa %xmm1, %xmm3
258+
; SSE-NEXT: psrld $31, %xmm1
259+
; SSE-NEXT: movdqa %xmm0, %xmm2
260+
; SSE-NEXT: psrad $31, %xmm2
261+
; SSE-NEXT: movdqa %xmm2, %xmm3
262+
; SSE-NEXT: psrld $29, %xmm3
263+
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm3[4,5,6,7]
264+
; SSE-NEXT: psrld $30, %xmm2
265+
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
266+
; SSE-NEXT: paddd %xmm0, %xmm2
267+
; SSE-NEXT: movdqa %xmm2, %xmm1
268+
; SSE-NEXT: psrad $3, %xmm1
269+
; SSE-NEXT: movdqa %xmm2, %xmm3
270270
; SSE-NEXT: psrad $1, %xmm3
271-
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
272-
; SSE-NEXT: psrad $2, %xmm1
273-
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
274-
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
275-
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
276-
; SSE-NEXT: psubd %xmm1, %xmm0
271+
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
272+
; SSE-NEXT: psrad $2, %xmm2
273+
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
274+
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1],xmm2[2,3,4,5,6,7]
275+
; SSE-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
276+
; SSE-NEXT: psubd %xmm2, %xmm0
277277
; SSE-NEXT: retq
278278
;
279279
; AVX1-LABEL: combine_vec_srem_by_pow2b:
280280
; AVX1: # %bb.0:
281-
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
282-
; AVX1-NEXT: vpsrld $29, %xmm1, %xmm2
283-
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm3
284-
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
285-
; AVX1-NEXT: vpsrld $30, %xmm1, %xmm1
286-
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
281+
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
282+
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
283+
; AVX1-NEXT: vpsrld $29, %xmm2, %xmm3
284+
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
285+
; AVX1-NEXT: vpsrld $30, %xmm2, %xmm2
286+
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
287287
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
288288
; AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
289289
; AVX1-NEXT: vpsrad $1, %xmm1, %xmm3
@@ -320,10 +320,10 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
320320
; SSE-NEXT: movdqa %xmm1, %xmm3
321321
; SSE-NEXT: psrld $30, %xmm3
322322
; SSE-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
323-
; SSE-NEXT: movdqa %xmm1, %xmm2
324-
; SSE-NEXT: psrld $29, %xmm2
325-
; SSE-NEXT: psrld $31, %xmm1
326-
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
323+
; SSE-NEXT: movdqa %xmm0, %xmm2
324+
; SSE-NEXT: psrld $31, %xmm2
325+
; SSE-NEXT: psrld $29, %xmm1
326+
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
327327
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
328328
; SSE-NEXT: paddd %xmm0, %xmm1
329329
; SSE-NEXT: movdqa %xmm1, %xmm2
@@ -346,9 +346,9 @@ define <4 x i32> @combine_vec_srem_by_pow2b_neg(<4 x i32> %x) {
346346
; AVX1-NEXT: vpsrld $28, %xmm1, %xmm2
347347
; AVX1-NEXT: vpsrld $30, %xmm1, %xmm3
348348
; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
349-
; AVX1-NEXT: vpsrld $29, %xmm1, %xmm3
350-
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
351-
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
349+
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm3
350+
; AVX1-NEXT: vpsrld $29, %xmm1, %xmm1
351+
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
352352
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
353353
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
354354
; AVX1-NEXT: vpsrad $4, %xmm1, %xmm2

0 commit comments

Comments
 (0)