Skip to content

Commit 7ac8486

Browse files
preameslukel97
andauthored
[RISCVInsertVSETVLI] Allow PRE with non-immediate AVLs (#71728)
Extend our PRE logic to cover non-immediate AVL values. This covers large constant AVLs (which must be materialized in registers), and may help some code written explicitly with intrinsics. Looking at the existing code, I can't entirely figure out why I thought we needed VL == AVL to perform the PRE. My best guess is that I was worried about the VLMAX < VL < 2 * VLMAX case, but the spec explicitly says that vsetvli must be determinist on any particular AVL value. That case was, possibly by accident, covering another legality precondition. Specifically, by only returning true for immediate and VLMAX AVL values, we didn't encounter the case where the AVL was a register and that register wasn't available in the predecessor (e.g. if AVL is a load in the MBB block itself). --------- Co-authored-by: Luke Lau <[email protected]>
1 parent 8131eeb commit 7ac8486

File tree

3 files changed

+50
-61
lines changed

3 files changed

+50
-61
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1308,29 +1308,6 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
13081308
}
13091309
}
13101310

1311-
/// Return true if the VL value configured by a vset(i)vli with the
1312-
/// provided Info must be equal to the requested AVL. That is, that
1313-
/// AVL <= VLMAX.
1314-
static bool willVLBeAVL(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
1315-
if (!Info.hasAVLImm())
1316-
// VLMAX is always the same value.
1317-
// TODO: Could extend to other registers by looking at the associated vreg
1318-
// def placement.
1319-
return RISCV::X0 == Info.getAVLReg();
1320-
1321-
unsigned AVL = Info.getAVLImm();
1322-
unsigned SEW = Info.getSEW();
1323-
unsigned AVLInBits = AVL * SEW;
1324-
1325-
unsigned LMul;
1326-
bool Fractional;
1327-
std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
1328-
1329-
if (Fractional)
1330-
return ST.getRealMinVLen() / LMul >= AVLInBits;
1331-
return ST.getRealMinVLen() * LMul >= AVLInBits;
1332-
}
1333-
13341311
/// Perform simple partial redundancy elimination of the VSETVLI instructions
13351312
/// we're about to insert by looking for cases where we can PRE from the
13361313
/// beginning of one block to the end of one of its predecessors. Specifically,
@@ -1364,9 +1341,21 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
13641341
if (UnavailablePred->succ_size() != 1)
13651342
return;
13661343

1367-
// If VL can be less than AVL, then we can't reduce the frequency of exec.
1368-
if (!willVLBeAVL(AvailableInfo, *ST))
1369-
return;
1344+
// If the AVL value is a register (other than our VLMAX sentinel),
1345+
// we need to prove the value is available at the point we're going
1346+
// to insert the vsetvli at.
1347+
if (AvailableInfo.hasAVLReg() && RISCV::X0 != AvailableInfo.getAVLReg()) {
1348+
MachineInstr *AVLDefMI = MRI->getVRegDef(AvailableInfo.getAVLReg());
1349+
if (!AVLDefMI)
1350+
return;
1351+
// This is an inline dominance check which covers the case of
1352+
// UnavailablePred being the preheader of a loop.
1353+
if (AVLDefMI->getParent() != UnavailablePred)
1354+
return;
1355+
for (auto &TermMI : UnavailablePred->terminators())
1356+
if (&TermMI == AVLDefMI)
1357+
return;
1358+
}
13701359

13711360
// Model the effect of changing the input state of the block MBB to
13721361
// AvailableInfo. We're looking for two issues here; one legality,

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@ define void @gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B)
1414
; CHECK-LABEL: gather:
1515
; CHECK: # %bb.0: # %entry
1616
; CHECK-NEXT: li a2, 1024
17-
; CHECK-NEXT: li a3, 32
18-
; CHECK-NEXT: li a4, 5
17+
; CHECK-NEXT: li a4, 32
18+
; CHECK-NEXT: li a3, 5
19+
; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma
1920
; CHECK-NEXT: .LBB0_1: # %vector.body
2021
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
21-
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
22-
; CHECK-NEXT: vlse8.v v8, (a1), a4
22+
; CHECK-NEXT: vlse8.v v8, (a1), a3
2323
; CHECK-NEXT: vle8.v v9, (a0)
2424
; CHECK-NEXT: vadd.vv v8, v9, v8
2525
; CHECK-NEXT: vse8.v v8, (a0)
@@ -126,12 +126,12 @@ define void @gather_negative_stride(ptr noalias nocapture %A, ptr noalias nocapt
126126
; CHECK: # %bb.0: # %entry
127127
; CHECK-NEXT: addi a1, a1, 155
128128
; CHECK-NEXT: li a2, 1024
129-
; CHECK-NEXT: li a3, 32
130-
; CHECK-NEXT: li a4, -5
129+
; CHECK-NEXT: li a4, 32
130+
; CHECK-NEXT: li a3, -5
131+
; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma
131132
; CHECK-NEXT: .LBB2_1: # %vector.body
132133
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
133-
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
134-
; CHECK-NEXT: vlse8.v v8, (a1), a4
134+
; CHECK-NEXT: vlse8.v v8, (a1), a3
135135
; CHECK-NEXT: vle8.v v9, (a0)
136136
; CHECK-NEXT: vadd.vv v8, v9, v8
137137
; CHECK-NEXT: vse8.v v8, (a0)
@@ -168,12 +168,12 @@ define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture
168168
; CHECK: # %bb.0: # %entry
169169
; CHECK-NEXT: li a2, 1024
170170
; CHECK-NEXT: li a3, 32
171+
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
171172
; CHECK-NEXT: .LBB3_1: # %vector.body
172173
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
173-
; CHECK-NEXT: lbu a4, 0(a1)
174-
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
174+
; CHECK-NEXT: lbu a3, 0(a1)
175175
; CHECK-NEXT: vle8.v v8, (a0)
176-
; CHECK-NEXT: vadd.vx v8, v8, a4
176+
; CHECK-NEXT: vadd.vx v8, v8, a3
177177
; CHECK-NEXT: vse8.v v8, (a0)
178178
; CHECK-NEXT: addi a2, a2, -32
179179
; CHECK-NEXT: addi a0, a0, 32
@@ -208,9 +208,9 @@ define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias noc
208208
; V: # %bb.0: # %entry
209209
; V-NEXT: li a2, 1024
210210
; V-NEXT: li a3, 32
211+
; V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
211212
; V-NEXT: .LBB4_1: # %vector.body
212213
; V-NEXT: # =>This Inner Loop Header: Depth=1
213-
; V-NEXT: vsetvli zero, a3, e8, m1, ta, ma
214214
; V-NEXT: vlse8.v v8, (a1), zero
215215
; V-NEXT: vle8.v v9, (a0)
216216
; V-NEXT: vdivu.vv v8, v8, v9
@@ -226,9 +226,9 @@ define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias noc
226226
; ZVE32F: # %bb.0: # %entry
227227
; ZVE32F-NEXT: li a2, 1024
228228
; ZVE32F-NEXT: li a3, 32
229+
; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
229230
; ZVE32F-NEXT: .LBB4_1: # %vector.body
230231
; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1
231-
; ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
232232
; ZVE32F-NEXT: vlse8.v v8, (a1), zero
233233
; ZVE32F-NEXT: vle8.v v9, (a0)
234234
; ZVE32F-NEXT: vdivu.vv v8, v8, v9
@@ -244,12 +244,12 @@ define void @gather_zero_stride_unfold(ptr noalias nocapture %A, ptr noalias noc
244244
; NOT-OPTIMIZED: # %bb.0: # %entry
245245
; NOT-OPTIMIZED-NEXT: li a2, 1024
246246
; NOT-OPTIMIZED-NEXT: li a3, 32
247+
; NOT-OPTIMIZED-NEXT: vsetvli zero, a3, e8, m1, ta, ma
247248
; NOT-OPTIMIZED-NEXT: .LBB4_1: # %vector.body
248249
; NOT-OPTIMIZED-NEXT: # =>This Inner Loop Header: Depth=1
249-
; NOT-OPTIMIZED-NEXT: lbu a4, 0(a1)
250-
; NOT-OPTIMIZED-NEXT: vsetvli zero, a3, e8, m1, ta, ma
250+
; NOT-OPTIMIZED-NEXT: lbu a3, 0(a1)
251251
; NOT-OPTIMIZED-NEXT: vle8.v v8, (a0)
252-
; NOT-OPTIMIZED-NEXT: vmv.v.x v9, a4
252+
; NOT-OPTIMIZED-NEXT: vmv.v.x v9, a3
253253
; NOT-OPTIMIZED-NEXT: vdivu.vv v8, v9, v8
254254
; NOT-OPTIMIZED-NEXT: vse8.v v8, (a0)
255255
; NOT-OPTIMIZED-NEXT: addi a2, a2, -32
@@ -288,15 +288,15 @@ define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B
288288
; CHECK-LABEL: scatter:
289289
; CHECK: # %bb.0: # %entry
290290
; CHECK-NEXT: li a2, 1024
291-
; CHECK-NEXT: li a3, 32
292-
; CHECK-NEXT: li a4, 5
291+
; CHECK-NEXT: li a4, 32
292+
; CHECK-NEXT: li a3, 5
293+
; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma
293294
; CHECK-NEXT: .LBB5_1: # %vector.body
294295
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
295-
; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma
296296
; CHECK-NEXT: vle8.v v8, (a1)
297-
; CHECK-NEXT: vlse8.v v9, (a0), a4
297+
; CHECK-NEXT: vlse8.v v9, (a0), a3
298298
; CHECK-NEXT: vadd.vv v8, v9, v8
299-
; CHECK-NEXT: vsse8.v v8, (a0), a4
299+
; CHECK-NEXT: vsse8.v v8, (a0), a3
300300
; CHECK-NEXT: addi a2, a2, -32
301301
; CHECK-NEXT: addi a1, a1, 32
302302
; CHECK-NEXT: addi a0, a0, 160
@@ -821,20 +821,20 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt
821821
; CHECK-NEXT: add a6, a0, a2
822822
; CHECK-NEXT: add a2, a1, a2
823823
; CHECK-NEXT: add a2, a2, a7
824-
; CHECK-NEXT: li a7, 32
825-
; CHECK-NEXT: li t0, 5
826-
; CHECK-NEXT: mv t1, a5
824+
; CHECK-NEXT: li t0, 32
825+
; CHECK-NEXT: li a7, 5
826+
; CHECK-NEXT: vsetvli zero, t0, e8, m1, ta, ma
827+
; CHECK-NEXT: mv t0, a5
827828
; CHECK-NEXT: .LBB13_3: # %bb15
828829
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
829-
; CHECK-NEXT: vsetvli zero, a7, e8, m1, ta, ma
830-
; CHECK-NEXT: vlse8.v v8, (a2), t0
830+
; CHECK-NEXT: vlse8.v v8, (a2), a7
831831
; CHECK-NEXT: vle8.v v9, (a6)
832832
; CHECK-NEXT: vadd.vv v8, v9, v8
833833
; CHECK-NEXT: vse8.v v8, (a6)
834-
; CHECK-NEXT: addi t1, t1, -32
834+
; CHECK-NEXT: addi t0, t0, -32
835835
; CHECK-NEXT: addi a6, a6, 32
836836
; CHECK-NEXT: addi a2, a2, 160
837-
; CHECK-NEXT: bnez t1, .LBB13_3
837+
; CHECK-NEXT: bnez t0, .LBB13_3
838838
; CHECK-NEXT: # %bb.4: # %bb30
839839
; CHECK-NEXT: beq a4, a5, .LBB13_7
840840
; CHECK-NEXT: .LBB13_5: # %bb32

llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3892,9 +3892,9 @@ define void @sink_splat_mul_lmul8(ptr nocapture %a, i32 signext %x) {
38923892
; CHECK: # %bb.0: # %entry
38933893
; CHECK-NEXT: li a2, 1024
38943894
; CHECK-NEXT: li a3, 32
3895+
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
38953896
; CHECK-NEXT: .LBB74_1: # %vector.body
38963897
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3897-
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
38983898
; CHECK-NEXT: vle32.v v8, (a0)
38993899
; CHECK-NEXT: vmul.vx v8, v8, a1
39003900
; CHECK-NEXT: vse32.v v8, (a0)
@@ -3927,9 +3927,9 @@ define void @sink_splat_add_lmul8(ptr nocapture %a, i32 signext %x) {
39273927
; CHECK: # %bb.0: # %entry
39283928
; CHECK-NEXT: li a2, 1024
39293929
; CHECK-NEXT: li a3, 32
3930+
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
39303931
; CHECK-NEXT: .LBB75_1: # %vector.body
39313932
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3932-
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
39333933
; CHECK-NEXT: vle32.v v8, (a0)
39343934
; CHECK-NEXT: vadd.vx v8, v8, a1
39353935
; CHECK-NEXT: vse32.v v8, (a0)
@@ -3962,9 +3962,9 @@ define void @sink_splat_sub_lmul8(ptr nocapture %a, i32 signext %x) {
39623962
; CHECK: # %bb.0: # %entry
39633963
; CHECK-NEXT: li a2, 1024
39643964
; CHECK-NEXT: li a3, 32
3965+
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
39653966
; CHECK-NEXT: .LBB76_1: # %vector.body
39663967
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
3967-
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
39683968
; CHECK-NEXT: vle32.v v8, (a0)
39693969
; CHECK-NEXT: vsub.vx v8, v8, a1
39703970
; CHECK-NEXT: vse32.v v8, (a0)
@@ -3997,9 +3997,9 @@ define void @sink_splat_rsub_lmul8(ptr nocapture %a, i32 signext %x) {
39973997
; CHECK: # %bb.0: # %entry
39983998
; CHECK-NEXT: li a2, 1024
39993999
; CHECK-NEXT: li a3, 32
4000+
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
40004001
; CHECK-NEXT: .LBB77_1: # %vector.body
40014002
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4002-
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
40034003
; CHECK-NEXT: vle32.v v8, (a0)
40044004
; CHECK-NEXT: vrsub.vx v8, v8, a1
40054005
; CHECK-NEXT: vse32.v v8, (a0)
@@ -4032,9 +4032,9 @@ define void @sink_splat_and_lmul8(ptr nocapture %a, i32 signext %x) {
40324032
; CHECK: # %bb.0: # %entry
40334033
; CHECK-NEXT: li a2, 1024
40344034
; CHECK-NEXT: li a3, 32
4035+
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
40354036
; CHECK-NEXT: .LBB78_1: # %vector.body
40364037
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4037-
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
40384038
; CHECK-NEXT: vle32.v v8, (a0)
40394039
; CHECK-NEXT: vand.vx v8, v8, a1
40404040
; CHECK-NEXT: vse32.v v8, (a0)
@@ -4067,9 +4067,9 @@ define void @sink_splat_or_lmul8(ptr nocapture %a, i32 signext %x) {
40674067
; CHECK: # %bb.0: # %entry
40684068
; CHECK-NEXT: li a2, 1024
40694069
; CHECK-NEXT: li a3, 32
4070+
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
40704071
; CHECK-NEXT: .LBB79_1: # %vector.body
40714072
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4072-
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
40734073
; CHECK-NEXT: vle32.v v8, (a0)
40744074
; CHECK-NEXT: vor.vx v8, v8, a1
40754075
; CHECK-NEXT: vse32.v v8, (a0)
@@ -4102,9 +4102,9 @@ define void @sink_splat_xor_lmul8(ptr nocapture %a, i32 signext %x) {
41024102
; CHECK: # %bb.0: # %entry
41034103
; CHECK-NEXT: li a2, 1024
41044104
; CHECK-NEXT: li a3, 32
4105+
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
41054106
; CHECK-NEXT: .LBB80_1: # %vector.body
41064107
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
4107-
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
41084108
; CHECK-NEXT: vle32.v v8, (a0)
41094109
; CHECK-NEXT: vxor.vx v8, v8, a1
41104110
; CHECK-NEXT: vse32.v v8, (a0)

0 commit comments

Comments
 (0)