Skip to content

Commit 8a20acd

Browse files
committed
[DAG] SimplifyMultipleUseDemandedBits - bypass ADD nodes if either operand is zero
The dpbusd_const.ll test change is due to use losing the expanded add reduction pattern as one of the elements is known to be zero (removing one of the adds from the reduction pyramid). I don't think its of concern. Noticed while working on #107423
1 parent aef0e77 commit 8a20acd

File tree

13 files changed

+306
-298
lines changed

13 files changed

+306
-298
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -797,6 +797,16 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
797797
return Op.getOperand(1);
798798
break;
799799
}
800+
case ISD::ADD: {
801+
RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
802+
if (RHSKnown.isZero())
803+
return Op.getOperand(0);
804+
805+
LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
806+
if (LHSKnown.isZero())
807+
return Op.getOperand(1);
808+
break;
809+
}
800810
case ISD::SHL: {
801811
// If we are only demanding sign bits then we can use the shift source
802812
// directly.

llvm/test/CodeGen/AArch64/srem-lkk.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,11 @@ define i32 @fold_srem_positive_even(i32 %x) {
2323
; CHECK-LABEL: fold_srem_positive_even:
2424
; CHECK: // %bb.0:
2525
; CHECK-NEXT: mov w8, #36849 // =0x8ff1
26+
; CHECK-NEXT: mov w9, #1060 // =0x424
2627
; CHECK-NEXT: movk w8, #15827, lsl #16
2728
; CHECK-NEXT: smull x8, w0, w8
28-
; CHECK-NEXT: lsr x9, x8, #63
2929
; CHECK-NEXT: asr x8, x8, #40
30-
; CHECK-NEXT: add w8, w8, w9
31-
; CHECK-NEXT: mov w9, #1060 // =0x424
30+
; CHECK-NEXT: add w8, w8, w8, lsr #31
3231
; CHECK-NEXT: msub w0, w8, w9, w0
3332
; CHECK-NEXT: ret
3433
%1 = srem i32 %x, 1060
@@ -40,12 +39,11 @@ define i32 @fold_srem_negative_odd(i32 %x) {
4039
; CHECK-LABEL: fold_srem_negative_odd:
4140
; CHECK: // %bb.0:
4241
; CHECK-NEXT: mov w8, #65445 // =0xffa5
42+
; CHECK-NEXT: mov w9, #-723 // =0xfffffd2d
4343
; CHECK-NEXT: movk w8, #42330, lsl #16
4444
; CHECK-NEXT: smull x8, w0, w8
45-
; CHECK-NEXT: lsr x9, x8, #63
4645
; CHECK-NEXT: asr x8, x8, #40
47-
; CHECK-NEXT: add w8, w8, w9
48-
; CHECK-NEXT: mov w9, #-723 // =0xfffffd2d
46+
; CHECK-NEXT: add w8, w8, w8, lsr #31
4947
; CHECK-NEXT: msub w0, w8, w9, w0
5048
; CHECK-NEXT: ret
5149
%1 = srem i32 %x, -723
@@ -57,12 +55,11 @@ define i32 @fold_srem_negative_even(i32 %x) {
5755
; CHECK-LABEL: fold_srem_negative_even:
5856
; CHECK: // %bb.0:
5957
; CHECK-NEXT: mov w8, #62439 // =0xf3e7
58+
; CHECK-NEXT: mov w9, #-22981 // =0xffffa63b
6059
; CHECK-NEXT: movk w8, #64805, lsl #16
6160
; CHECK-NEXT: smull x8, w0, w8
62-
; CHECK-NEXT: lsr x9, x8, #63
6361
; CHECK-NEXT: asr x8, x8, #40
64-
; CHECK-NEXT: add w8, w8, w9
65-
; CHECK-NEXT: mov w9, #-22981 // =0xffffa63b
62+
; CHECK-NEXT: add w8, w8, w8, lsr #31
6663
; CHECK-NEXT: msub w0, w8, w9, w0
6764
; CHECK-NEXT: ret
6865
%1 = srem i32 %x, -22981

llvm/test/CodeGen/AArch64/srem-vector-lkk.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -263,16 +263,14 @@ define <2 x i32> @fold_srem_v2i32(<2 x i32> %x) {
263263
; CHECK-LABEL: fold_srem_v2i32:
264264
; CHECK: // %bb.0:
265265
; CHECK-NEXT: mov w8, #26215 // =0x6667
266-
; CHECK-NEXT: movi v3.2s, #10
266+
; CHECK-NEXT: movi v2.2s, #10
267267
; CHECK-NEXT: movk w8, #26214, lsl #16
268268
; CHECK-NEXT: dup v1.2s, w8
269269
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
270-
; CHECK-NEXT: ushr v2.2d, v1.2d, #63
271270
; CHECK-NEXT: sshr v1.2d, v1.2d, #34
272-
; CHECK-NEXT: xtn v2.2s, v2.2d
273271
; CHECK-NEXT: xtn v1.2s, v1.2d
274-
; CHECK-NEXT: add v1.2s, v1.2s, v2.2s
275-
; CHECK-NEXT: mls v0.2s, v1.2s, v3.2s
272+
; CHECK-NEXT: usra v1.2s, v1.2s, #31
273+
; CHECK-NEXT: mls v0.2s, v1.2s, v2.2s
276274
; CHECK-NEXT: ret
277275
%1 = srem <2 x i32> %x, <i32 10, i32 10>
278276
ret <2 x i32> %1

llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,45 +11,35 @@ define dso_local fastcc void @BuildVectorICE() unnamed_addr {
1111
; 32BIT-NEXT: stwu 1, -48(1)
1212
; 32BIT-NEXT: .cfi_def_cfa_offset 48
1313
; 32BIT-NEXT: lxvw4x 34, 0, 3
14-
; 32BIT-NEXT: li 3, .LCPI0_0@l
15-
; 32BIT-NEXT: lis 4, .LCPI0_0@ha
1614
; 32BIT-NEXT: li 5, 0
17-
; 32BIT-NEXT: xxlxor 36, 36, 36
18-
; 32BIT-NEXT: lxvw4x 35, 4, 3
1915
; 32BIT-NEXT: addi 3, 1, 16
2016
; 32BIT-NEXT: addi 4, 1, 32
21-
; 32BIT-NEXT: .p2align 4
17+
; 32BIT-NEXT: xxspltw 35, 34, 1
18+
; 32BIT-NEXT: .p2align 5
2219
; 32BIT-NEXT: .LBB0_1: # %while.body
2320
; 32BIT-NEXT: #
2421
; 32BIT-NEXT: stw 5, 16(1)
25-
; 32BIT-NEXT: lxvw4x 37, 0, 3
26-
; 32BIT-NEXT: vperm 5, 5, 4, 3
27-
; 32BIT-NEXT: vadduwm 5, 2, 5
28-
; 32BIT-NEXT: xxspltw 32, 37, 1
29-
; 32BIT-NEXT: vadduwm 5, 5, 0
30-
; 32BIT-NEXT: stxvw4x 37, 0, 4
22+
; 32BIT-NEXT: lxvw4x 36, 0, 3
23+
; 32BIT-NEXT: vadduwm 4, 2, 4
24+
; 32BIT-NEXT: vadduwm 4, 4, 3
25+
; 32BIT-NEXT: stxvw4x 36, 0, 4
3126
; 32BIT-NEXT: lwz 5, 32(1)
3227
; 32BIT-NEXT: b .LBB0_1
3328
;
3429
; 64BIT-LABEL: BuildVectorICE:
3530
; 64BIT: # %bb.0: # %entry
3631
; 64BIT-NEXT: lxvw4x 34, 0, 3
3732
; 64BIT-NEXT: li 3, 0
38-
; 64BIT-NEXT: rldimi 3, 3, 32, 0
39-
; 64BIT-NEXT: mtfprd 0, 3
40-
; 64BIT-NEXT: li 3, 0
41-
; 64BIT-NEXT: .p2align 4
33+
; 64BIT-NEXT: xxspltw 35, 34, 1
34+
; 64BIT-NEXT: .p2align 5
4235
; 64BIT-NEXT: .LBB0_1: # %while.body
4336
; 64BIT-NEXT: #
44-
; 64BIT-NEXT: li 4, 0
45-
; 64BIT-NEXT: rldimi 4, 3, 32, 0
46-
; 64BIT-NEXT: mtfprd 1, 4
47-
; 64BIT-NEXT: xxmrghd 35, 1, 0
48-
; 64BIT-NEXT: vadduwm 3, 2, 3
49-
; 64BIT-NEXT: xxspltw 36, 35, 1
50-
; 64BIT-NEXT: vadduwm 3, 3, 4
51-
; 64BIT-NEXT: xxsldwi 1, 35, 35, 3
52-
; 64BIT-NEXT: mffprwz 3, 1
37+
; 64BIT-NEXT: sldi 3, 3, 32
38+
; 64BIT-NEXT: mtvsrd 36, 3
39+
; 64BIT-NEXT: vadduwm 4, 2, 4
40+
; 64BIT-NEXT: vadduwm 4, 4, 3
41+
; 64BIT-NEXT: xxsldwi 0, 36, 36, 3
42+
; 64BIT-NEXT: mffprwz 3, 0
5343
; 64BIT-NEXT: b .LBB0_1
5444
entry:
5545
br label %while.body

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13487,7 +13487,6 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
1348713487
; RV32ZVE32F-NEXT: vid.v v8
1348813488
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 4
1348913489
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
13490-
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
1349113490
; RV32ZVE32F-NEXT: lw a3, 0(a1)
1349213491
; RV32ZVE32F-NEXT: sw a3, 252(sp) # 4-byte Folded Spill
1349313492
; RV32ZVE32F-NEXT: lw a1, 4(a1)
@@ -13587,10 +13586,10 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
1358713586
; RV32ZVE32F-NEXT: lw s9, 4(a1)
1358813587
; RV32ZVE32F-NEXT: lw s10, 0(a2)
1358913588
; RV32ZVE32F-NEXT: lw s11, 4(a2)
13590-
; RV32ZVE32F-NEXT: lw t5, 0(a3)
13591-
; RV32ZVE32F-NEXT: lw t6, 4(a3)
13592-
; RV32ZVE32F-NEXT: lw s2, 0(a4)
13593-
; RV32ZVE32F-NEXT: lw s3, 4(a4)
13589+
; RV32ZVE32F-NEXT: lw s4, 0(a3)
13590+
; RV32ZVE32F-NEXT: lw s5, 4(a3)
13591+
; RV32ZVE32F-NEXT: lw s6, 0(a4)
13592+
; RV32ZVE32F-NEXT: lw s7, 4(a4)
1359413593
; RV32ZVE32F-NEXT: lw a2, 336(sp)
1359513594
; RV32ZVE32F-NEXT: lw a4, 340(sp)
1359613595
; RV32ZVE32F-NEXT: lw a5, 344(sp)
@@ -13607,8 +13606,8 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
1360713606
; RV32ZVE32F-NEXT: lw a6, 356(sp)
1360813607
; RV32ZVE32F-NEXT: lw t3, 360(sp)
1360913608
; RV32ZVE32F-NEXT: lw t4, 364(sp)
13610-
; RV32ZVE32F-NEXT: lw s4, 0(a5)
13611-
; RV32ZVE32F-NEXT: sw s4, 116(sp) # 4-byte Folded Spill
13609+
; RV32ZVE32F-NEXT: lw t5, 0(a5)
13610+
; RV32ZVE32F-NEXT: sw t5, 116(sp) # 4-byte Folded Spill
1361213611
; RV32ZVE32F-NEXT: lw a5, 4(a5)
1361313612
; RV32ZVE32F-NEXT: sw a5, 112(sp) # 4-byte Folded Spill
1361413613
; RV32ZVE32F-NEXT: lw a5, 0(a6)
@@ -13626,10 +13625,10 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
1362613625
; RV32ZVE32F-NEXT: lw a6, 372(sp)
1362713626
; RV32ZVE32F-NEXT: lw t3, 376(sp)
1362813627
; RV32ZVE32F-NEXT: lw t4, 380(sp)
13629-
; RV32ZVE32F-NEXT: lw s4, 0(a5)
13630-
; RV32ZVE32F-NEXT: lw s5, 4(a5)
13631-
; RV32ZVE32F-NEXT: lw s6, 0(a6)
13632-
; RV32ZVE32F-NEXT: lw s7, 4(a6)
13628+
; RV32ZVE32F-NEXT: lw t5, 0(a5)
13629+
; RV32ZVE32F-NEXT: lw t6, 4(a5)
13630+
; RV32ZVE32F-NEXT: lw s2, 0(a6)
13631+
; RV32ZVE32F-NEXT: lw s3, 4(a6)
1363313632
; RV32ZVE32F-NEXT: lw a5, 0(t3)
1363413633
; RV32ZVE32F-NEXT: lw a6, 4(t3)
1363513634
; RV32ZVE32F-NEXT: lw t3, 0(t4)
@@ -13642,10 +13641,10 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
1364213641
; RV32ZVE32F-NEXT: sw t0, 164(a0)
1364313642
; RV32ZVE32F-NEXT: sw t1, 168(a0)
1364413643
; RV32ZVE32F-NEXT: sw t2, 172(a0)
13645-
; RV32ZVE32F-NEXT: sw t5, 144(a0)
13646-
; RV32ZVE32F-NEXT: sw t6, 148(a0)
13647-
; RV32ZVE32F-NEXT: sw s2, 152(a0)
13648-
; RV32ZVE32F-NEXT: sw s3, 156(a0)
13644+
; RV32ZVE32F-NEXT: sw s4, 144(a0)
13645+
; RV32ZVE32F-NEXT: sw s5, 148(a0)
13646+
; RV32ZVE32F-NEXT: sw s6, 152(a0)
13647+
; RV32ZVE32F-NEXT: sw s7, 156(a0)
1364913648
; RV32ZVE32F-NEXT: sw s8, 128(a0)
1365013649
; RV32ZVE32F-NEXT: sw s9, 132(a0)
1365113650
; RV32ZVE32F-NEXT: sw s10, 136(a0)
@@ -13686,10 +13685,10 @@ define <32 x i64> @mgather_strided_split(ptr %base) {
1368613685
; RV32ZVE32F-NEXT: sw a6, 244(a0)
1368713686
; RV32ZVE32F-NEXT: sw t3, 248(a0)
1368813687
; RV32ZVE32F-NEXT: sw t4, 252(a0)
13689-
; RV32ZVE32F-NEXT: sw s4, 224(a0)
13690-
; RV32ZVE32F-NEXT: sw s5, 228(a0)
13691-
; RV32ZVE32F-NEXT: sw s6, 232(a0)
13692-
; RV32ZVE32F-NEXT: sw s7, 236(a0)
13688+
; RV32ZVE32F-NEXT: sw t5, 224(a0)
13689+
; RV32ZVE32F-NEXT: sw t6, 228(a0)
13690+
; RV32ZVE32F-NEXT: sw s2, 232(a0)
13691+
; RV32ZVE32F-NEXT: sw s3, 236(a0)
1369313692
; RV32ZVE32F-NEXT: sw ra, 208(a0)
1369413693
; RV32ZVE32F-NEXT: lw a1, 108(sp) # 4-byte Folded Reload
1369513694
; RV32ZVE32F-NEXT: sw a1, 212(a0)

llvm/test/CodeGen/X86/combine-pmuldq.ll

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -203,46 +203,39 @@ define i32 @PR43159(ptr %a0) {
203203
; SSE-LABEL: PR43159:
204204
; SSE: # %bb.0: # %entry
205205
; SSE-NEXT: movdqa (%rdi), %xmm0
206-
; SSE-NEXT: movdqa %xmm0, %xmm1
207-
; SSE-NEXT: psrld $1, %xmm1
208-
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
206+
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
209207
; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
210-
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
211-
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
212-
; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
213-
; SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
214-
; SSE-NEXT: psubd %xmm2, %xmm0
208+
; SSE-NEXT: movdqa %xmm0, %xmm2
209+
; SSE-NEXT: psrld $1, %xmm2
210+
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7]
211+
; SSE-NEXT: psubd %xmm1, %xmm0
215212
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
216213
; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
217-
; SSE-NEXT: pxor %xmm2, %xmm2
218-
; SSE-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
219-
; SSE-NEXT: paddd %xmm1, %xmm2
220-
; SSE-NEXT: movdqa %xmm2, %xmm0
214+
; SSE-NEXT: paddd %xmm1, %xmm0
221215
; SSE-NEXT: psrld $7, %xmm0
222-
; SSE-NEXT: psrld $6, %xmm2
223-
; SSE-NEXT: movd %xmm2, %edi
216+
; SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
217+
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
218+
; SSE-NEXT: psrld $6, %xmm1
219+
; SSE-NEXT: movd %xmm1, %edi
224220
; SSE-NEXT: pextrd $1, %xmm0, %esi
225-
; SSE-NEXT: pextrd $2, %xmm2, %edx
221+
; SSE-NEXT: pextrd $2, %xmm1, %edx
226222
; SSE-NEXT: pextrd $3, %xmm0, %ecx
227223
; SSE-NEXT: jmp foo # TAILCALL
228224
;
229225
; AVX1-LABEL: PR43159:
230226
; AVX1: # %bb.0: # %entry
231227
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
232-
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
233-
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
228+
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
234229
; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
235-
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
236-
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
230+
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm2
231+
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
237232
; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
238-
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
239-
; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm0
240-
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
233+
; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
234+
; AVX1-NEXT: vpsrld $7, %xmm1, %xmm1
235+
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm2
236+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7]
241237
; AVX1-NEXT: vpmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
242-
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
243-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
244-
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
245-
; AVX1-NEXT: vpsrld $7, %xmm0, %xmm1
238+
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
246239
; AVX1-NEXT: vpsrld $6, %xmm0, %xmm0
247240
; AVX1-NEXT: vmovd %xmm0, %edi
248241
; AVX1-NEXT: vpextrd $1, %xmm1, %esi

0 commit comments

Comments
 (0)