Skip to content

Commit 117d755

Browse files
authored
[DAG] SimplifyDemandedBits - use ComputeKnownBits instead of getValidShiftAmountConstant to check for constant shift amounts. (#92412)
This allows us to handle cases where the constant has already been type legalized behind a bitcast Despite calling ComputeKnownBits I'm not seeing any notable change in compile time.
1 parent 309a881 commit 117d755

File tree

6 files changed

+52
-68
lines changed

6 files changed

+52
-68
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,9 +1732,9 @@ bool TargetLowering::SimplifyDemandedBits(
17321732
SDValue Op1 = Op.getOperand(1);
17331733
EVT ShiftVT = Op1.getValueType();
17341734

1735-
if (const APInt *SA =
1736-
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1737-
unsigned ShAmt = SA->getZExtValue();
1735+
KnownBits KnownSA = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1736+
if (KnownSA.isConstant() && KnownSA.getConstant().ult(BitWidth)) {
1737+
unsigned ShAmt = KnownSA.getConstant().getZExtValue();
17381738
if (ShAmt == 0)
17391739
return TLO.CombineTo(Op, Op0);
17401740

@@ -1744,9 +1744,10 @@ bool TargetLowering::SimplifyDemandedBits(
17441744
// TODO - support non-uniform vector amounts.
17451745
if (Op0.getOpcode() == ISD::SRL) {
17461746
if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1747-
if (const APInt *SA2 =
1748-
TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1749-
unsigned C1 = SA2->getZExtValue();
1747+
KnownBits InnerSA = TLO.DAG.computeKnownBits(Op0.getOperand(1),
1748+
DemandedElts, Depth + 1);
1749+
if (InnerSA.isConstant() && InnerSA.getConstant().ult(BitWidth)) {
1750+
unsigned C1 = InnerSA.getConstant().getZExtValue();
17501751
unsigned Opc = ISD::SHL;
17511752
int Diff = ShAmt - C1;
17521753
if (Diff < 0) {
@@ -1912,9 +1913,9 @@ bool TargetLowering::SimplifyDemandedBits(
19121913
DemandedElts, Depth + 1))
19131914
return TLO.CombineTo(Op, AVG);
19141915

1915-
if (const APInt *SA =
1916-
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1917-
unsigned ShAmt = SA->getZExtValue();
1916+
KnownBits KnownSA = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
1917+
if (KnownSA.isConstant() && KnownSA.getConstant().ult(BitWidth)) {
1918+
unsigned ShAmt = KnownSA.getConstant().getZExtValue();
19181919
if (ShAmt == 0)
19191920
return TLO.CombineTo(Op, Op0);
19201921

@@ -1924,9 +1925,10 @@ bool TargetLowering::SimplifyDemandedBits(
19241925
// TODO - support non-uniform vector amounts.
19251926
if (Op0.getOpcode() == ISD::SHL) {
19261927
if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1927-
if (const APInt *SA2 =
1928-
TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1929-
unsigned C1 = SA2->getZExtValue();
1928+
KnownBits InnerSA = TLO.DAG.computeKnownBits(Op0.getOperand(1),
1929+
DemandedElts, Depth + 1);
1930+
if (InnerSA.isConstant() && InnerSA.getConstant().ult(BitWidth)) {
1931+
unsigned C1 = InnerSA.getConstant().getZExtValue();
19301932
unsigned Opc = ISD::SRL;
19311933
int Diff = ShAmt - C1;
19321934
if (Diff < 0) {
@@ -2018,24 +2020,25 @@ bool TargetLowering::SimplifyDemandedBits(
20182020
DemandedElts, Depth + 1))
20192021
return TLO.CombineTo(Op, AVG);
20202022

2021-
if (const APInt *SA =
2022-
TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
2023-
unsigned ShAmt = SA->getZExtValue();
2023+
KnownBits KnownSA = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2024+
if (KnownSA.isConstant() && KnownSA.getConstant().ult(BitWidth)) {
2025+
unsigned ShAmt = KnownSA.getConstant().getZExtValue();
20242026
if (ShAmt == 0)
20252027
return TLO.CombineTo(Op, Op0);
20262028

20272029
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
20282030
// supports sext_inreg.
20292031
if (Op0.getOpcode() == ISD::SHL) {
2030-
if (const APInt *InnerSA =
2031-
TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
2032+
KnownBits InnerSA = TLO.DAG.computeKnownBits(Op0.getOperand(1),
2033+
DemandedElts, Depth + 1);
2034+
if (InnerSA.isConstant() && InnerSA.getConstant().ult(BitWidth)) {
20322035
unsigned LowBits = BitWidth - ShAmt;
20332036
EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
20342037
if (VT.isVector())
20352038
ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
20362039
VT.getVectorElementCount());
20372040

2038-
if (*InnerSA == ShAmt) {
2041+
if (InnerSA.getConstant() == ShAmt) {
20392042
if (!TLO.LegalOperations() ||
20402043
getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == Legal)
20412044
return TLO.CombineTo(

llvm/test/CodeGen/AArch64/sadd_sat_vec.ll

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,12 +149,12 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
149149
; CHECK-SD: // %bb.0:
150150
; CHECK-SD-NEXT: ldr s0, [x0]
151151
; CHECK-SD-NEXT: ldr s1, [x1]
152-
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
153-
; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
152+
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
153+
; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0
154154
; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8
155155
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
156156
; CHECK-SD-NEXT: sqadd v0.4h, v0.4h, v1.4h
157-
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
157+
; CHECK-SD-NEXT: ushr v0.4h, v0.4h, #8
158158
; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
159159
; CHECK-SD-NEXT: str s0, [x2]
160160
; CHECK-SD-NEXT: ret
@@ -364,10 +364,6 @@ define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind {
364364
define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
365365
; CHECK-LABEL: v16i4:
366366
; CHECK: // %bb.0:
367-
; CHECK-NEXT: shl v0.16b, v0.16b, #4
368-
; CHECK-NEXT: shl v1.16b, v1.16b, #4
369-
; CHECK-NEXT: sshr v0.16b, v0.16b, #4
370-
; CHECK-NEXT: sshr v1.16b, v1.16b, #4
371367
; CHECK-NEXT: shl v1.16b, v1.16b, #4
372368
; CHECK-NEXT: shl v0.16b, v0.16b, #4
373369
; CHECK-NEXT: sqadd v0.16b, v0.16b, v1.16b

llvm/test/CodeGen/AArch64/ssub_sat_vec.ll

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -150,12 +150,12 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
150150
; CHECK-SD: // %bb.0:
151151
; CHECK-SD-NEXT: ldr s0, [x0]
152152
; CHECK-SD-NEXT: ldr s1, [x1]
153-
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
154-
; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0
153+
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
154+
; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0
155155
; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8
156156
; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
157157
; CHECK-SD-NEXT: sqsub v0.4h, v0.4h, v1.4h
158-
; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
158+
; CHECK-SD-NEXT: ushr v0.4h, v0.4h, #8
159159
; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
160160
; CHECK-SD-NEXT: str s0, [x2]
161161
; CHECK-SD-NEXT: ret
@@ -365,10 +365,6 @@ define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind {
365365
define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
366366
; CHECK-LABEL: v16i4:
367367
; CHECK: // %bb.0:
368-
; CHECK-NEXT: shl v0.16b, v0.16b, #4
369-
; CHECK-NEXT: shl v1.16b, v1.16b, #4
370-
; CHECK-NEXT: sshr v0.16b, v0.16b, #4
371-
; CHECK-NEXT: sshr v1.16b, v1.16b, #4
372368
; CHECK-NEXT: shl v1.16b, v1.16b, #4
373369
; CHECK-NEXT: shl v0.16b, v0.16b, #4
374370
; CHECK-NEXT: sqsub v0.16b, v0.16b, v1.16b

llvm/test/CodeGen/PowerPC/pr44183.ll

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,8 @@ define void @_ZN1m1nEv(ptr %this) local_unnamed_addr nounwind align 2 {
2222
; CHECK-NEXT: rlwimi r4, r3, 0, 0, 0
2323
; CHECK-NEXT: bl _ZN1llsE1d
2424
; CHECK-NEXT: nop
25-
; CHECK-NEXT: ld r3, 16(r30)
26-
; CHECK-NEXT: ld r4, 8(r30)
27-
; CHECK-NEXT: rldicl r4, r4, 60, 4
28-
; CHECK-NEXT: sldi r3, r3, 60
29-
; CHECK-NEXT: or r3, r3, r4
30-
; CHECK-NEXT: sldi r3, r3, 31
31-
; CHECK-NEXT: rlwinm r4, r3, 0, 0, 0
25+
; CHECK-NEXT: ld r3, 8(r30)
26+
; CHECK-NEXT: rlwinm r4, r3, 27, 0, 0
3227
; CHECK-NEXT: bl _ZN1llsE1d
3328
; CHECK-NEXT: nop
3429
; CHECK-NEXT: addi r1, r1, 48

llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,9 @@ define <vscale x 1 x i8> @bitreverse_nxv1i8(<vscale x 1 x i8> %va) {
88
; CHECK-LABEL: bitreverse_nxv1i8:
99
; CHECK: # %bb.0:
1010
; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
11-
; CHECK-NEXT: vsrl.vi v9, v8, 4
12-
; CHECK-NEXT: vand.vi v8, v8, 15
13-
; CHECK-NEXT: vsll.vi v8, v8, 4
14-
; CHECK-NEXT: vor.vv v8, v9, v8
11+
; CHECK-NEXT: vsll.vi v9, v8, 4
12+
; CHECK-NEXT: vsrl.vi v8, v8, 4
13+
; CHECK-NEXT: vor.vv v8, v8, v9
1514
; CHECK-NEXT: vsrl.vi v9, v8, 2
1615
; CHECK-NEXT: li a0, 51
1716
; CHECK-NEXT: vand.vx v9, v9, a0
@@ -40,10 +39,9 @@ define <vscale x 2 x i8> @bitreverse_nxv2i8(<vscale x 2 x i8> %va) {
4039
; CHECK-LABEL: bitreverse_nxv2i8:
4140
; CHECK: # %bb.0:
4241
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
43-
; CHECK-NEXT: vsrl.vi v9, v8, 4
44-
; CHECK-NEXT: vand.vi v8, v8, 15
45-
; CHECK-NEXT: vsll.vi v8, v8, 4
46-
; CHECK-NEXT: vor.vv v8, v9, v8
42+
; CHECK-NEXT: vsll.vi v9, v8, 4
43+
; CHECK-NEXT: vsrl.vi v8, v8, 4
44+
; CHECK-NEXT: vor.vv v8, v8, v9
4745
; CHECK-NEXT: vsrl.vi v9, v8, 2
4846
; CHECK-NEXT: li a0, 51
4947
; CHECK-NEXT: vand.vx v9, v9, a0
@@ -72,10 +70,9 @@ define <vscale x 4 x i8> @bitreverse_nxv4i8(<vscale x 4 x i8> %va) {
7270
; CHECK-LABEL: bitreverse_nxv4i8:
7371
; CHECK: # %bb.0:
7472
; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
75-
; CHECK-NEXT: vsrl.vi v9, v8, 4
76-
; CHECK-NEXT: vand.vi v8, v8, 15
77-
; CHECK-NEXT: vsll.vi v8, v8, 4
78-
; CHECK-NEXT: vor.vv v8, v9, v8
73+
; CHECK-NEXT: vsll.vi v9, v8, 4
74+
; CHECK-NEXT: vsrl.vi v8, v8, 4
75+
; CHECK-NEXT: vor.vv v8, v8, v9
7976
; CHECK-NEXT: vsrl.vi v9, v8, 2
8077
; CHECK-NEXT: li a0, 51
8178
; CHECK-NEXT: vand.vx v9, v9, a0
@@ -104,10 +101,9 @@ define <vscale x 8 x i8> @bitreverse_nxv8i8(<vscale x 8 x i8> %va) {
104101
; CHECK-LABEL: bitreverse_nxv8i8:
105102
; CHECK: # %bb.0:
106103
; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
107-
; CHECK-NEXT: vsrl.vi v9, v8, 4
108-
; CHECK-NEXT: vand.vi v8, v8, 15
109-
; CHECK-NEXT: vsll.vi v8, v8, 4
110-
; CHECK-NEXT: vor.vv v8, v9, v8
104+
; CHECK-NEXT: vsll.vi v9, v8, 4
105+
; CHECK-NEXT: vsrl.vi v8, v8, 4
106+
; CHECK-NEXT: vor.vv v8, v8, v9
111107
; CHECK-NEXT: vsrl.vi v9, v8, 2
112108
; CHECK-NEXT: li a0, 51
113109
; CHECK-NEXT: vand.vx v9, v9, a0
@@ -136,10 +132,9 @@ define <vscale x 16 x i8> @bitreverse_nxv16i8(<vscale x 16 x i8> %va) {
136132
; CHECK-LABEL: bitreverse_nxv16i8:
137133
; CHECK: # %bb.0:
138134
; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma
139-
; CHECK-NEXT: vsrl.vi v10, v8, 4
140-
; CHECK-NEXT: vand.vi v8, v8, 15
141-
; CHECK-NEXT: vsll.vi v8, v8, 4
142-
; CHECK-NEXT: vor.vv v8, v10, v8
135+
; CHECK-NEXT: vsll.vi v10, v8, 4
136+
; CHECK-NEXT: vsrl.vi v8, v8, 4
137+
; CHECK-NEXT: vor.vv v8, v8, v10
143138
; CHECK-NEXT: vsrl.vi v10, v8, 2
144139
; CHECK-NEXT: li a0, 51
145140
; CHECK-NEXT: vand.vx v10, v10, a0
@@ -168,10 +163,9 @@ define <vscale x 32 x i8> @bitreverse_nxv32i8(<vscale x 32 x i8> %va) {
168163
; CHECK-LABEL: bitreverse_nxv32i8:
169164
; CHECK: # %bb.0:
170165
; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
171-
; CHECK-NEXT: vsrl.vi v12, v8, 4
172-
; CHECK-NEXT: vand.vi v8, v8, 15
173-
; CHECK-NEXT: vsll.vi v8, v8, 4
174-
; CHECK-NEXT: vor.vv v8, v12, v8
166+
; CHECK-NEXT: vsll.vi v12, v8, 4
167+
; CHECK-NEXT: vsrl.vi v8, v8, 4
168+
; CHECK-NEXT: vor.vv v8, v8, v12
175169
; CHECK-NEXT: vsrl.vi v12, v8, 2
176170
; CHECK-NEXT: li a0, 51
177171
; CHECK-NEXT: vand.vx v12, v12, a0
@@ -200,10 +194,9 @@ define <vscale x 64 x i8> @bitreverse_nxv64i8(<vscale x 64 x i8> %va) {
200194
; CHECK-LABEL: bitreverse_nxv64i8:
201195
; CHECK: # %bb.0:
202196
; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma
203-
; CHECK-NEXT: vsrl.vi v16, v8, 4
204-
; CHECK-NEXT: vand.vi v8, v8, 15
205-
; CHECK-NEXT: vsll.vi v8, v8, 4
206-
; CHECK-NEXT: vor.vv v8, v16, v8
197+
; CHECK-NEXT: vsll.vi v16, v8, 4
198+
; CHECK-NEXT: vsrl.vi v8, v8, 4
199+
; CHECK-NEXT: vor.vv v8, v8, v16
207200
; CHECK-NEXT: vsrl.vi v16, v8, 2
208201
; CHECK-NEXT: li a0, 51
209202
; CHECK-NEXT: vand.vx v16, v16, a0

llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3176,7 +3176,8 @@ define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_ashr_32(<2 x i64> %
31763176
;
31773177
; X86-AVX2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_32:
31783178
; X86-AVX2: # %bb.0:
3179-
; X86-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
3179+
; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967294,4294967294,4294967294,4294967294]
3180+
; X86-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
31803181
; X86-AVX2-NEXT: vpsrad $31, %xmm0, %xmm1
31813182
; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
31823183
; X86-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]

0 commit comments

Comments
 (0)