Skip to content

Commit 803dba0

Browse files
committed
[RISCV] Let LiveIntervals::shrinkToUses compute dead immediate. NFC
We can simplify removing dead AVL immediates > 31 by using the dead argument to shrinkToUses, since it will already compute dead values.
1 parent 8348d72 commit 803dba0

File tree

3 files changed

+271
-16
lines changed

3 files changed

+271
-16
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1696,19 +1696,10 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
16961696
if (NextMI->getOperand(1).isReg())
16971697
NextMI->getOperand(1).setReg(RISCV::NoRegister);
16981698

1699-
if (OldVLReg && OldVLReg.isVirtual()) {
1700-
// NextMI no longer uses OldVLReg so shrink its LiveInterval.
1701-
if (LIS)
1702-
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1703-
1704-
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1705-
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1706-
MRI->use_nodbg_empty(OldVLReg)) {
1707-
VLOpDef->eraseFromParent();
1708-
if (LIS)
1709-
LIS->removeInterval(OldVLReg);
1710-
}
1711-
}
1699+
// NextMI no longer uses OldVLReg so shrink its LiveInterval.
1700+
if (OldVLReg && OldVLReg.isVirtual() && LIS)
1701+
LIS->shrinkToUses(&LIS->getInterval(OldVLReg), &ToDelete);
1702+
17121703
MI.setDesc(NextMI->getDesc());
17131704
}
17141705
MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
@@ -1720,11 +1711,22 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
17201711
Used = getDemanded(MI, ST);
17211712
}
17221713

1723-
NumCoalescedVSETVL += ToDelete.size();
17241714
for (auto *MI : ToDelete) {
1715+
bool SawStore = false;
1716+
if (!MI->isSafeToMove(nullptr, SawStore) || MI->isBundled() ||
1717+
MI->isInlineAsm()) {
1718+
assert(!isVectorConfigInstr(*MI));
1719+
continue;
1720+
}
1721+
17251722
if (LIS)
17261723
LIS->RemoveMachineInstrFromMaps(*MI);
17271724
MI->eraseFromParent();
1725+
if (LIS)
1726+
for (MachineOperand &MO : MI->uses())
1727+
if (MO.isReg() && MO.getReg().isVirtual())
1728+
LIS->shrinkToUses(&LIS->getInterval(MO.getReg()));
1729+
NumCoalescedVSETVL++;
17281730
}
17291731
}
17301732

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s
3+
4+
; This previously crashed when spilling a GPR because when we removed a dead
5+
; ADDI we weren't removing it from the LIS instruction map
6+
7+
define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscale x 4 x i1> %arg.6, i64 %arg.7, i1 %arg.8, i64 %arg.9, i32 %arg.10) vscale_range(2,2) {
8+
; CHECK-LABEL: main:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: addi sp, sp, -112
11+
; CHECK-NEXT: .cfi_def_cfa_offset 112
12+
; CHECK-NEXT: sd ra, 104(sp) # 8-byte Folded Spill
13+
; CHECK-NEXT: sd s0, 96(sp) # 8-byte Folded Spill
14+
; CHECK-NEXT: sd s1, 88(sp) # 8-byte Folded Spill
15+
; CHECK-NEXT: sd s2, 80(sp) # 8-byte Folded Spill
16+
; CHECK-NEXT: sd s3, 72(sp) # 8-byte Folded Spill
17+
; CHECK-NEXT: sd s4, 64(sp) # 8-byte Folded Spill
18+
; CHECK-NEXT: sd s5, 56(sp) # 8-byte Folded Spill
19+
; CHECK-NEXT: sd s6, 48(sp) # 8-byte Folded Spill
20+
; CHECK-NEXT: sd s7, 40(sp) # 8-byte Folded Spill
21+
; CHECK-NEXT: sd s8, 32(sp) # 8-byte Folded Spill
22+
; CHECK-NEXT: sd s9, 24(sp) # 8-byte Folded Spill
23+
; CHECK-NEXT: sd s10, 16(sp) # 8-byte Folded Spill
24+
; CHECK-NEXT: sd s11, 8(sp) # 8-byte Folded Spill
25+
; CHECK-NEXT: .cfi_offset ra, -8
26+
; CHECK-NEXT: .cfi_offset s0, -16
27+
; CHECK-NEXT: .cfi_offset s1, -24
28+
; CHECK-NEXT: .cfi_offset s2, -32
29+
; CHECK-NEXT: .cfi_offset s3, -40
30+
; CHECK-NEXT: .cfi_offset s4, -48
31+
; CHECK-NEXT: .cfi_offset s5, -56
32+
; CHECK-NEXT: .cfi_offset s6, -64
33+
; CHECK-NEXT: .cfi_offset s7, -72
34+
; CHECK-NEXT: .cfi_offset s8, -80
35+
; CHECK-NEXT: .cfi_offset s9, -88
36+
; CHECK-NEXT: .cfi_offset s10, -96
37+
; CHECK-NEXT: .cfi_offset s11, -104
38+
; CHECK-NEXT: li a1, 0
39+
; CHECK-NEXT: li a3, 8
40+
; CHECK-NEXT: li a5, 12
41+
; CHECK-NEXT: li a6, 4
42+
; CHECK-NEXT: li a7, 20
43+
; CHECK-NEXT: ld s0, 112(sp)
44+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
45+
; CHECK-NEXT: vmv.v.i v8, 0
46+
; CHECK-NEXT: andi a4, a4, 1
47+
; CHECK-NEXT: li t0, 4
48+
; CHECK-NEXT: .LBB0_1: # %for.cond1.preheader.i
49+
; CHECK-NEXT: # =>This Loop Header: Depth=1
50+
; CHECK-NEXT: # Child Loop BB0_2 Depth 2
51+
; CHECK-NEXT: # Child Loop BB0_3 Depth 3
52+
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
53+
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
54+
; CHECK-NEXT: mv t1, a7
55+
; CHECK-NEXT: mv t2, t0
56+
; CHECK-NEXT: mv t3, a5
57+
; CHECK-NEXT: mv t4, a3
58+
; CHECK-NEXT: mv t5, a1
59+
; CHECK-NEXT: .LBB0_2: # %for.cond5.preheader.i
60+
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
61+
; CHECK-NEXT: # => This Loop Header: Depth=2
62+
; CHECK-NEXT: # Child Loop BB0_3 Depth 3
63+
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
64+
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
65+
; CHECK-NEXT: mv t6, t1
66+
; CHECK-NEXT: mv s1, t2
67+
; CHECK-NEXT: mv s2, t3
68+
; CHECK-NEXT: mv s3, t4
69+
; CHECK-NEXT: mv s4, t5
70+
; CHECK-NEXT: .LBB0_3: # %for.cond9.preheader.i
71+
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
72+
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
73+
; CHECK-NEXT: # => This Loop Header: Depth=3
74+
; CHECK-NEXT: # Child Loop BB0_4 Depth 4
75+
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
76+
; CHECK-NEXT: mv s6, t6
77+
; CHECK-NEXT: mv s7, s1
78+
; CHECK-NEXT: mv s8, s2
79+
; CHECK-NEXT: mv s9, s3
80+
; CHECK-NEXT: mv s10, s4
81+
; CHECK-NEXT: .LBB0_4: # %vector.ph.i
82+
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
83+
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
84+
; CHECK-NEXT: # Parent Loop BB0_3 Depth=3
85+
; CHECK-NEXT: # => This Loop Header: Depth=4
86+
; CHECK-NEXT: # Child Loop BB0_5 Depth 5
87+
; CHECK-NEXT: li s5, 0
88+
; CHECK-NEXT: .LBB0_5: # %vector.body.i
89+
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
90+
; CHECK-NEXT: # Parent Loop BB0_2 Depth=2
91+
; CHECK-NEXT: # Parent Loop BB0_3 Depth=3
92+
; CHECK-NEXT: # Parent Loop BB0_4 Depth=4
93+
; CHECK-NEXT: # => This Inner Loop Header: Depth=5
94+
; CHECK-NEXT: addi s11, s5, 4
95+
; CHECK-NEXT: add ra, s9, s5
96+
; CHECK-NEXT: vse32.v v8, (ra), v0.t
97+
; CHECK-NEXT: add s5, s7, s5
98+
; CHECK-NEXT: vse32.v v8, (s5), v0.t
99+
; CHECK-NEXT: mv s5, s11
100+
; CHECK-NEXT: bne s11, a6, .LBB0_5
101+
; CHECK-NEXT: # %bb.6: # %for.cond.cleanup15.i
102+
; CHECK-NEXT: # in Loop: Header=BB0_4 Depth=4
103+
; CHECK-NEXT: addi s10, s10, 4
104+
; CHECK-NEXT: addi s9, s9, 4
105+
; CHECK-NEXT: addi s8, s8, 4
106+
; CHECK-NEXT: addi s7, s7, 4
107+
; CHECK-NEXT: andi s5, a0, 1
108+
; CHECK-NEXT: addi s6, s6, 4
109+
; CHECK-NEXT: beqz s5, .LBB0_4
110+
; CHECK-NEXT: # %bb.7: # %for.cond.cleanup11.i
111+
; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=3
112+
; CHECK-NEXT: addi s4, s4, 4
113+
; CHECK-NEXT: addi s3, s3, 4
114+
; CHECK-NEXT: addi s2, s2, 4
115+
; CHECK-NEXT: addi s1, s1, 4
116+
; CHECK-NEXT: andi s6, a2, 1
117+
; CHECK-NEXT: addi t6, t6, 4
118+
; CHECK-NEXT: beqz s6, .LBB0_3
119+
; CHECK-NEXT: # %bb.8: # %for.cond.cleanup7.i
120+
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=2
121+
; CHECK-NEXT: addi t5, t5, 4
122+
; CHECK-NEXT: addi t4, t4, 4
123+
; CHECK-NEXT: addi t3, t3, 4
124+
; CHECK-NEXT: addi t2, t2, 4
125+
; CHECK-NEXT: addi t1, t1, 4
126+
; CHECK-NEXT: beqz a4, .LBB0_2
127+
; CHECK-NEXT: # %bb.9: # %for.cond.cleanup3.i
128+
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
129+
; CHECK-NEXT: addi a1, a1, 4
130+
; CHECK-NEXT: addi a3, a3, 4
131+
; CHECK-NEXT: addi a5, a5, 4
132+
; CHECK-NEXT: addi t0, t0, 4
133+
; CHECK-NEXT: addi a7, a7, 4
134+
; CHECK-NEXT: beqz s6, .LBB0_1
135+
; CHECK-NEXT: # %bb.10: # %l.exit
136+
; CHECK-NEXT: li a0, 0
137+
; CHECK-NEXT: jalr a0
138+
; CHECK-NEXT: beqz s5, .LBB0_12
139+
; CHECK-NEXT: .LBB0_11: # %for.body7.us.14
140+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
141+
; CHECK-NEXT: j .LBB0_11
142+
; CHECK-NEXT: .LBB0_12: # %for.body7.us.19
143+
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
144+
; CHECK-NEXT: vmv.s.x v8, s0
145+
; CHECK-NEXT: vmv.v.i v16, 0
146+
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
147+
; CHECK-NEXT: vslideup.vi v16, v8, 1
148+
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
149+
; CHECK-NEXT: vmsne.vi v8, v16, 0
150+
; CHECK-NEXT: vmv.x.s a0, v8
151+
; CHECK-NEXT: snez a0, a0
152+
; CHECK-NEXT: sb a0, 0(zero)
153+
; CHECK-NEXT: li a0, 0
154+
; CHECK-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
155+
; CHECK-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
156+
; CHECK-NEXT: ld s1, 88(sp) # 8-byte Folded Reload
157+
; CHECK-NEXT: ld s2, 80(sp) # 8-byte Folded Reload
158+
; CHECK-NEXT: ld s3, 72(sp) # 8-byte Folded Reload
159+
; CHECK-NEXT: ld s4, 64(sp) # 8-byte Folded Reload
160+
; CHECK-NEXT: ld s5, 56(sp) # 8-byte Folded Reload
161+
; CHECK-NEXT: ld s6, 48(sp) # 8-byte Folded Reload
162+
; CHECK-NEXT: ld s7, 40(sp) # 8-byte Folded Reload
163+
; CHECK-NEXT: ld s8, 32(sp) # 8-byte Folded Reload
164+
; CHECK-NEXT: ld s9, 24(sp) # 8-byte Folded Reload
165+
; CHECK-NEXT: ld s10, 16(sp) # 8-byte Folded Reload
166+
; CHECK-NEXT: ld s11, 8(sp) # 8-byte Folded Reload
167+
; CHECK-NEXT: addi sp, sp, 112
168+
; CHECK-NEXT: ret
169+
entry:
170+
%0 = tail call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
171+
br label %for.cond1.preheader.i
172+
173+
for.cond1.preheader.i: ; preds = %for.cond.cleanup3.i, %entry
174+
%arg.21 = phi i64 [ 0, %entry ], [ %indvars.iv.next74.i, %for.cond.cleanup3.i ]
175+
br label %for.cond5.preheader.i
176+
177+
for.cond5.preheader.i: ; preds = %for.cond.cleanup7.i, %for.cond1.preheader.i
178+
%arg.42 = phi i64 [ 0, %for.cond1.preheader.i ], [ %indvars.iv.next70.i, %for.cond.cleanup7.i ]
179+
%1 = add i64 %arg.42, %arg.21
180+
br label %for.cond9.preheader.i
181+
182+
for.cond.cleanup3.i: ; preds = %for.cond.cleanup7.i
183+
%indvars.iv.next74.i = add i64 %arg.21, 1
184+
br i1 %arg.3, label %l.exit, label %for.cond1.preheader.i
185+
186+
for.cond9.preheader.i: ; preds = %for.cond.cleanup11.i, %for.cond5.preheader.i
187+
%arg.74 = phi i64 [ 0, %for.cond5.preheader.i ], [ %indvars.iv.next66.i, %for.cond.cleanup11.i ]
188+
%2 = add i64 %1, %arg.74
189+
br label %vector.ph.i
190+
191+
for.cond.cleanup7.i: ; preds = %for.cond.cleanup11.i
192+
%indvars.iv.next70.i = add i64 %arg.42, 1
193+
br i1 %arg.5, label %for.cond.cleanup3.i, label %for.cond5.preheader.i
194+
195+
vector.ph.i: ; preds = %for.cond.cleanup15.i, %for.cond9.preheader.i
196+
%arg.96 = phi i64 [ 0, %for.cond9.preheader.i ], [ %indvars.iv.next62.i, %for.cond.cleanup15.i ]
197+
%3 = add i64 %2, %arg.96
198+
%broadcast.splatinsert.i = insertelement <vscale x 4 x i64> zeroinitializer, i64 %3, i64 0
199+
%broadcast.splat.i = shufflevector <vscale x 4 x i64> %broadcast.splatinsert.i, <vscale x 4 x i64> zeroinitializer, <vscale x 4 x i32> zeroinitializer
200+
br label %vector.body.i
201+
202+
vector.body.i: ; preds = %vector.body.i, %vector.ph.i
203+
%index.i = phi i64 [ 0, %vector.ph.i ], [ %index.next.i, %vector.body.i ]
204+
%vec.ind.i = phi <vscale x 4 x i64> [ %0, %vector.ph.i ], [ %6, %vector.body.i ]
205+
%4 = add <vscale x 4 x i64> %vec.ind.i, %broadcast.splat.i
206+
%5 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %4
207+
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %5, i32 4, <vscale x 4 x i1> zeroinitializer)
208+
%6 = add <vscale x 4 x i64> %vec.ind.i, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
209+
%7 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %6
210+
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %7, i32 4, <vscale x 4 x i1> zeroinitializer)
211+
%arg.100 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
212+
%arg.101 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.100
213+
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.101, i32 4, <vscale x 4 x i1> %arg.6)
214+
%arg.102 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
215+
%arg.103 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.102
216+
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.103, i32 4, <vscale x 4 x i1> zeroinitializer)
217+
%arg.104 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
218+
%arg.105 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.104
219+
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.105, i32 4, <vscale x 4 x i1> %arg.6)
220+
%arg.106 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 5, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
221+
%arg.107 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.106
222+
tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.107, i32 4, <vscale x 4 x i1> zeroinitializer)
223+
%index.next.i = add i64 %index.i, 1
224+
%arg.108 = icmp eq i64 %index.i, 0
225+
br i1 %arg.108, label %for.cond.cleanup15.i, label %vector.body.i
226+
227+
for.cond.cleanup11.i: ; preds = %for.cond.cleanup15.i
228+
%indvars.iv.next66.i = add i64 %arg.74, 1
229+
br i1 %arg.3, label %for.cond.cleanup7.i, label %for.cond9.preheader.i
230+
231+
for.cond.cleanup15.i: ; preds = %vector.body.i
232+
%indvars.iv.next62.i = add i64 %arg.96, 1
233+
br i1 %arg.1, label %for.cond.cleanup11.i, label %vector.ph.i
234+
235+
l.exit: ; preds = %for.cond.cleanup3.i
236+
tail call void null()
237+
br i1 %arg.1, label %for.body7.us.14, label %for.body7.us.19
238+
239+
for.body7.us.14: ; preds = %for.body7.us.14, %l.exit
240+
br label %for.body7.us.14
241+
242+
for.body7.us.19: ; preds = %l.exit
243+
%arg.109 = insertelement <32 x i32> zeroinitializer, i32 %arg.10, i64 1
244+
%8 = icmp ne <32 x i32> %arg.109, zeroinitializer
245+
%9 = bitcast <32 x i1> %8 to i32
246+
%op.rdx13 = icmp ne i32 %9, 0
247+
%op.rdx = zext i1 %op.rdx13 to i8
248+
store i8 %op.rdx, ptr null, align 1
249+
ret i32 0
250+
}
251+
252+
uselistorder i64 0, { 0, 1, 13, 2, 3, 14, 4, 15, 5, 16, 6, 7, 8, 9, 10, 11, 12 }
253+
uselistorder i64 1, { 0, 1, 2, 3, 6, 4, 5 }
254+
uselistorder ptr @llvm.masked.scatter.nxv4i32.nxv4p0, { 5, 4, 3, 2, 1, 0 }

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -539,8 +539,7 @@ body: |
539539
; CHECK-LABEL: name: coalesce_dead_avl_nonvolatile_load
540540
; CHECK: liveins: $x1
541541
; CHECK-NEXT: {{ $}}
542-
; CHECK-NEXT: %ptr:gpr = COPY $x1
543-
; CHECK-NEXT: dead %avl:gprnox0 = LW %ptr, 0 :: (dereferenceable load (s32))
542+
; CHECK-NEXT: dead %ptr:gpr = COPY $x1
544543
; CHECK-NEXT: $x0 = PseudoVSETIVLI 3, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
545544
; CHECK-NEXT: dead %x:gpr = PseudoVMV_X_S $noreg, 6 /* e64 */, implicit $vtype
546545
; CHECK-NEXT: $v0 = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 3, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype

0 commit comments

Comments
 (0)