Skip to content

Commit 311339e

Browse files
committed
[DAG] SimplifyDemandedBits - ISD::AND - only request DemandedElts when looking for a splat constant
Limit the isConstOrConstSplat call to the vector elements we care about Noticed while investigating regressions in #92096
1 parent cf92e51 commit 311339e

File tree

2 files changed

+66
-74
lines changed

2 files changed

+66
-74
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1380,7 +1380,7 @@ bool TargetLowering::SimplifyDemandedBits(
13801380
// using the bits from the RHS. Below, we use knowledge about the RHS to
13811381
// simplify the LHS, here we're using information from the LHS to simplify
13821382
// the RHS.
1383-
if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1383+
if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1, DemandedElts)) {
13841384
// Do not increment Depth here; that can cause an infinite loop.
13851385
KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
13861386
// If the LHS already has zeros where RHSC does, this 'and' is dead.

llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll

Lines changed: 65 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -6,39 +6,36 @@ define arm_aapcs_vfpcc <4 x i32> @loads_i32(ptr %A, ptr %B, ptr %C) {
66
; CHECK: @ %bb.0: @ %entry
77
; CHECK-NEXT: .save {r4, r5, r6, lr}
88
; CHECK-NEXT: push {r4, r5, r6, lr}
9-
; CHECK-NEXT: .vsave {d8, d9}
10-
; CHECK-NEXT: vpush {d8, d9}
11-
; CHECK-NEXT: vldrw.u32 q1, [r1]
12-
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
13-
; CHECK-NEXT: vmov.f32 s0, s6
14-
; CHECK-NEXT: vmov.f32 s2, s7
15-
; CHECK-NEXT: vand q0, q0, q2
16-
; CHECK-NEXT: vmov.f32 s6, s5
17-
; CHECK-NEXT: vmov r4, r5, d0
18-
; CHECK-NEXT: vmov r3, r1, d1
9+
; CHECK-NEXT: vldrw.u32 q2, [r1]
10+
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
11+
; CHECK-NEXT: vmov.f32 s0, s10
12+
; CHECK-NEXT: vmov.f32 s2, s11
13+
; CHECK-NEXT: vand q0, q0, q1
14+
; CHECK-NEXT: vmov.f32 s10, s9
15+
; CHECK-NEXT: vmov r3, r4, d0
16+
; CHECK-NEXT: vand q2, q2, q1
17+
; CHECK-NEXT: vmov r5, r1, d1
1918
; CHECK-NEXT: vldrw.u32 q0, [r0]
19+
; CHECK-NEXT: vldrw.u32 q1, [r2]
20+
; CHECK-NEXT: vmov lr, r12, d5
2021
; CHECK-NEXT: vmov.f32 s12, s2
2122
; CHECK-NEXT: vmov.f32 s2, s3
2223
; CHECK-NEXT: vmov r0, s12
23-
; CHECK-NEXT: vand q3, q1, q2
24-
; CHECK-NEXT: vldrw.u32 q1, [r2]
25-
; CHECK-NEXT: vmov lr, r12, d7
26-
; CHECK-NEXT: vmov.f32 s16, s6
27-
; CHECK-NEXT: vmov.f32 s18, s7
28-
; CHECK-NEXT: vand q2, q4, q2
24+
; CHECK-NEXT: vmov.f32 s12, s6
25+
; CHECK-NEXT: vmov.f32 s6, s7
2926
; CHECK-NEXT: asrs r2, r0, #31
30-
; CHECK-NEXT: adds r0, r0, r4
31-
; CHECK-NEXT: adcs r5, r2
32-
; CHECK-NEXT: vmov r2, s8
33-
; CHECK-NEXT: asrl r0, r5, r2
27+
; CHECK-NEXT: adds r0, r0, r3
28+
; CHECK-NEXT: adc.w r3, r2, r4
29+
; CHECK-NEXT: vmov r2, s12
30+
; CHECK-NEXT: asrl r0, r3, r2
3431
; CHECK-NEXT: vmov r2, s2
3532
; CHECK-NEXT: vmov.f32 s2, s1
36-
; CHECK-NEXT: asrs r4, r2, #31
37-
; CHECK-NEXT: adds r2, r2, r3
38-
; CHECK-NEXT: adcs r1, r4
39-
; CHECK-NEXT: vmov r3, s10
33+
; CHECK-NEXT: asrs r3, r2, #31
34+
; CHECK-NEXT: adds r2, r2, r5
35+
; CHECK-NEXT: adcs r1, r3
36+
; CHECK-NEXT: vmov r3, s6
4037
; CHECK-NEXT: asrl r2, r1, r3
41-
; CHECK-NEXT: vmov r4, r5, d6
38+
; CHECK-NEXT: vmov r4, r5, d4
4239
; CHECK-NEXT: vmov r1, s2
4340
; CHECK-NEXT: vmov.f32 s2, s5
4441
; CHECK-NEXT: adds.w r6, r1, lr
@@ -54,7 +51,6 @@ define arm_aapcs_vfpcc <4 x i32> @loads_i32(ptr %A, ptr %B, ptr %C) {
5451
; CHECK-NEXT: asrl r4, r1, r3
5552
; CHECK-NEXT: vmov q0[2], q0[0], r4, r0
5653
; CHECK-NEXT: vmov q0[3], q0[1], r6, r2
57-
; CHECK-NEXT: vpop {d8, d9}
5854
; CHECK-NEXT: pop {r4, r5, r6, pc}
5955
entry:
6056
%a = load <4 x i32>, ptr %A, align 4
@@ -138,62 +134,58 @@ entry:
138134
define arm_aapcs_vfpcc void @load_store_i32(ptr %A, ptr %B, ptr %C, ptr %D) {
139135
; CHECK-LABEL: load_store_i32:
140136
; CHECK: @ %bb.0: @ %entry
141-
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
142-
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
143-
; CHECK-NEXT: .pad #4
144-
; CHECK-NEXT: sub sp, #4
145-
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
146-
; CHECK-NEXT: vpush {d8, d9, d10, d11}
147-
; CHECK-NEXT: vldrw.u32 q0, [r1]
148-
; CHECK-NEXT: vmov.i64 q4, #0xffffffff
149-
; CHECK-NEXT: vmov.f32 s4, s2
150-
; CHECK-NEXT: vmov.f32 s2, s1
151-
; CHECK-NEXT: vmov.f32 s6, s3
152-
; CHECK-NEXT: vand q2, q0, q4
137+
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
138+
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
139+
; CHECK-NEXT: .vsave {d8}
140+
; CHECK-NEXT: vpush {d8}
141+
; CHECK-NEXT: vldrw.u32 q2, [r1]
142+
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
143+
; CHECK-NEXT: vmov.f32 s4, s10
144+
; CHECK-NEXT: vmov.f32 s6, s11
145+
; CHECK-NEXT: vmov.f32 s10, s9
146+
; CHECK-NEXT: vand q1, q1, q0
147+
; CHECK-NEXT: vand q2, q2, q0
153148
; CHECK-NEXT: vldrw.u32 q0, [r0]
154-
; CHECK-NEXT: vand q1, q1, q4
155-
; CHECK-NEXT: vmov r5, r1, d3
149+
; CHECK-NEXT: vmov r6, r4, d3
156150
; CHECK-NEXT: vmov.f32 s12, s2
157151
; CHECK-NEXT: vmov.f32 s2, s3
158-
; CHECK-NEXT: vmov r0, r12, d2
152+
; CHECK-NEXT: vmov lr, r12, d2
159153
; CHECK-NEXT: vldrw.u32 q1, [r2]
160-
; CHECK-NEXT: vmov r4, lr, d5
161-
; CHECK-NEXT: vmov.f32 s20, s6
162-
; CHECK-NEXT: vmov.f32 s6, s1
163-
; CHECK-NEXT: vmov.f32 s22, s7
164-
; CHECK-NEXT: vand q4, q5, q4
165-
; CHECK-NEXT: vmov r6, s2
154+
; CHECK-NEXT: vmov r5, r1, d5
155+
; CHECK-NEXT: vmov.f32 s16, s6
156+
; CHECK-NEXT: vmov.f32 s6, s7
157+
; CHECK-NEXT: vmov.f32 s10, s1
158+
; CHECK-NEXT: vmov r0, s2
166159
; CHECK-NEXT: vmov.f32 s2, s5
167-
; CHECK-NEXT: adds r2, r6, r5
168-
; CHECK-NEXT: vmov r5, s18
169-
; CHECK-NEXT: asr.w r7, r6, #31
170-
; CHECK-NEXT: adcs r1, r7
171-
; CHECK-NEXT: asrl r2, r1, r5
172-
; CHECK-NEXT: vmov r7, s2
173-
; CHECK-NEXT: vmov r1, s6
174-
; CHECK-NEXT: adds r4, r4, r1
175-
; CHECK-NEXT: asr.w r5, r1, #31
176-
; CHECK-NEXT: adc.w r1, r5, lr
177-
; CHECK-NEXT: asrl r4, r1, r7
178-
; CHECK-NEXT: vmov r6, r5, d4
160+
; CHECK-NEXT: adds.w r8, r0, r6
161+
; CHECK-NEXT: asr.w r2, r0, #31
162+
; CHECK-NEXT: adc.w r7, r2, r4
163+
; CHECK-NEXT: vmov r2, s6
164+
; CHECK-NEXT: asrl r8, r7, r2
165+
; CHECK-NEXT: vmov r2, s10
166+
; CHECK-NEXT: asrs r4, r2, #31
167+
; CHECK-NEXT: adds r2, r2, r5
168+
; CHECK-NEXT: adcs r1, r4
169+
; CHECK-NEXT: vmov r4, s2
170+
; CHECK-NEXT: asrl r2, r1, r4
171+
; CHECK-NEXT: vmov r5, r7, d4
179172
; CHECK-NEXT: vmov r1, s12
180-
; CHECK-NEXT: adds r0, r0, r1
181-
; CHECK-NEXT: asr.w r7, r1, #31
182-
; CHECK-NEXT: adc.w r1, r7, r12
183-
; CHECK-NEXT: vmov r7, s16
184-
; CHECK-NEXT: asrl r0, r1, r7
173+
; CHECK-NEXT: adds.w r6, r1, lr
174+
; CHECK-NEXT: asr.w r4, r1, #31
175+
; CHECK-NEXT: adc.w r1, r4, r12
176+
; CHECK-NEXT: vmov r4, s16
177+
; CHECK-NEXT: asrl r6, r1, r4
185178
; CHECK-NEXT: vmov r1, s0
186-
; CHECK-NEXT: adds r6, r6, r1
187-
; CHECK-NEXT: asr.w r7, r1, #31
188-
; CHECK-NEXT: adc.w r1, r7, r5
179+
; CHECK-NEXT: adds r0, r1, r5
180+
; CHECK-NEXT: asr.w r4, r1, #31
181+
; CHECK-NEXT: adc.w r1, r4, r7
189182
; CHECK-NEXT: vmov r7, s4
190-
; CHECK-NEXT: asrl r6, r1, r7
191-
; CHECK-NEXT: vmov q0[2], q0[0], r6, r0
192-
; CHECK-NEXT: vmov q0[3], q0[1], r4, r2
183+
; CHECK-NEXT: asrl r0, r1, r7
184+
; CHECK-NEXT: vmov q0[2], q0[0], r0, r6
185+
; CHECK-NEXT: vmov q0[3], q0[1], r2, r8
193186
; CHECK-NEXT: vstrw.32 q0, [r3]
194-
; CHECK-NEXT: vpop {d8, d9, d10, d11}
195-
; CHECK-NEXT: add sp, #4
196-
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
187+
; CHECK-NEXT: vpop {d8}
188+
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
197189
entry:
198190
%a = load <4 x i32>, ptr %A, align 4
199191
%b = load <4 x i32>, ptr %B, align 4

0 commit comments

Comments
 (0)