Skip to content

Commit 8675cd3

Browse files
authored
[RISCV][VLOPT] Compute demanded VLs up front (#124530)
This replaces the worklist by instead computing what VL is demanded by each instruction's users first, which is done via checkUsers. The demanded VLs are stored in a DenseMap, and then we can just do a single forward pass of tryReduceVL where we check if a candidate's demanded VL is less than its VLOp. This means the pass should now be linear in complexity, and allows us to relax the restriction on tied operands in more easily as in #124066.
1 parent cdced8e commit 8675cd3

File tree

3 files changed

+45
-42
lines changed

3 files changed

+45
-42
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 25 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
#include "RISCV.h"
2020
#include "RISCVSubtarget.h"
21-
#include "llvm/ADT/SetVector.h"
21+
#include "llvm/ADT/PostOrderIterator.h"
2222
#include "llvm/CodeGen/MachineDominators.h"
2323
#include "llvm/CodeGen/MachineFunctionPass.h"
2424
#include "llvm/InitializePasses.h"
@@ -56,6 +56,10 @@ class RISCVVLOptimizer : public MachineFunctionPass {
5656
std::optional<MachineOperand> checkUsers(MachineInstr &MI);
5757
bool tryReduceVL(MachineInstr &MI);
5858
bool isCandidate(const MachineInstr &MI) const;
59+
60+
/// For a given instruction, records what elements of it are demanded by
61+
/// downstream users.
62+
DenseMap<const MachineInstr *, std::optional<MachineOperand>> DemandedVLs;
5963
};
6064

6165
} // end anonymous namespace
@@ -1201,14 +1205,19 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) {
12011205
// Looking for an immediate or a register VL that isn't X0.
12021206
assert((!VLOp.isReg() || VLOp.getReg() != RISCV::X0) &&
12031207
"Did not expect X0 VL");
1208+
1209+
// If we know the demanded VL of UserMI, then we can reduce the VL it
1210+
// requires.
1211+
if (auto DemandedVL = DemandedVLs[&UserMI]) {
1212+
assert(isCandidate(UserMI));
1213+
if (RISCV::isVLKnownLE(*DemandedVL, VLOp))
1214+
return DemandedVL;
1215+
}
1216+
12041217
return VLOp;
12051218
}
12061219

12071220
std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
1208-
// FIXME: Avoid visiting each user for each time we visit something on the
1209-
// worklist, combined with an extra visit from the outer loop. Restructure
1210-
// along lines of an instcombine style worklist which integrates the outer
1211-
// pass.
12121221
std::optional<MachineOperand> CommonVL;
12131222
for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
12141223
const MachineInstr &UserMI = *UserOp.getParent();
@@ -1285,7 +1294,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) {
12851294
return false;
12861295
}
12871296

1288-
auto CommonVL = checkUsers(MI);
1297+
auto CommonVL = DemandedVLs[&MI];
12891298
if (!CommonVL)
12901299
return false;
12911300

@@ -1333,29 +1342,19 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
13331342
if (!ST.hasVInstructions())
13341343
return false;
13351344

1336-
SetVector<MachineInstr *> Worklist;
1337-
auto PushOperands = [this, &Worklist](MachineInstr &MI,
1338-
bool IgnoreSameBlock) {
1339-
for (auto &Op : MI.operands()) {
1340-
if (!Op.isReg() || !Op.isUse() || !Op.getReg().isVirtual() ||
1341-
!isVectorRegClass(Op.getReg(), MRI))
1342-
continue;
1343-
1344-
MachineInstr *DefMI = MRI->getVRegDef(Op.getReg());
1345-
if (!isCandidate(*DefMI))
1346-
continue;
1347-
1348-
if (IgnoreSameBlock && DefMI->getParent() == MI.getParent())
1345+
// For each instruction that defines a vector, compute what VL its
1346+
// downstream users demand.
1347+
for (MachineBasicBlock *MBB : post_order(&MF)) {
1348+
assert(MDT->isReachableFromEntry(MBB));
1349+
for (MachineInstr &MI : reverse(*MBB)) {
1350+
if (!isCandidate(MI))
13491351
continue;
1350-
1351-
Worklist.insert(DefMI);
1352+
DemandedVLs.insert({&MI, checkUsers(MI)});
13521353
}
1353-
};
1354+
}
13541355

1355-
// Do a first pass eagerly rewriting in roughly reverse instruction
1356-
// order, populate the worklist with any instructions we might need to
1357-
// revisit. We avoid adding definitions to the worklist if they're
1358-
// in the same block - we're about to visit them anyways.
1356+
// Then go through and see if we can reduce the VL of any instructions to
1357+
// only what's demanded.
13591358
bool MadeChange = false;
13601359
for (MachineBasicBlock &MBB : MF) {
13611360
// Avoid unreachable blocks as they have degenerate dominance
@@ -1368,18 +1367,8 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
13681367
if (!tryReduceVL(MI))
13691368
continue;
13701369
MadeChange = true;
1371-
PushOperands(MI, /*IgnoreSameBlock*/ true);
13721370
}
13731371
}
13741372

1375-
while (!Worklist.empty()) {
1376-
assert(MadeChange);
1377-
MachineInstr &MI = *Worklist.pop_back_val();
1378-
assert(isCandidate(MI));
1379-
if (!tryReduceVL(MI))
1380-
continue;
1381-
PushOperands(MI, /*IgnoreSameBlock*/ false);
1382-
}
1383-
13841373
return MadeChange;
13851374
}

llvm/test/CodeGen/RISCV/rvv/vl-opt.mir

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,13 +162,11 @@ body: |
162162
; CHECK-NEXT: bb.1:
163163
; CHECK-NEXT: %a1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
164164
; CHECK-NEXT: %a2:vr = PseudoVADD_VV_M1 $noreg, %a1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
165-
; CHECK-NEXT: $v8 = COPY %a2
166165
; CHECK-NEXT: PseudoRET
167166
; CHECK-NEXT: {{ $}}
168167
; CHECK-NEXT: bb.2:
169168
; CHECK-NEXT: %b1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
170169
; CHECK-NEXT: %b2:vr = PseudoVADD_VV_M1 $noreg, %b1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
171-
; CHECK-NEXT: $v8 = COPY %b2
172170
; CHECK-NEXT: PseudoRET
173171
; CHECK-NEXT: {{ $}}
174172
; CHECK-NEXT: bb.3:
@@ -183,15 +181,31 @@ body: |
183181
bb.1:
184182
%a1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
185183
%a2:vr = PseudoVADD_VV_M1 $noreg, %a1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
186-
$v8 = COPY %a2
187184
PseudoRET
188185
bb.2:
189186
%b1:vr = PseudoVADD_VV_M1 $noreg, %c, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
190187
%b2:vr = PseudoVADD_VV_M1 $noreg, %b1, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
191-
$v8 = COPY %b2
192188
PseudoRET
193189
bb.3:
194190
liveins: $x1
195191
%c:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
196192
BEQ $x1, $x0, %bb.1
197193
PseudoBR %bb.2
194+
...
195+
---
196+
name: unreachable
197+
body: |
198+
; CHECK-LABEL: name: unreachable
199+
; CHECK: bb.0:
200+
; CHECK-NEXT: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
201+
; CHECK-NEXT: PseudoRET
202+
; CHECK-NEXT: {{ $}}
203+
; CHECK-NEXT: bb.1:
204+
; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
205+
; CHECK-NEXT: PseudoRET
206+
bb.0:
207+
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
208+
PseudoRET
209+
bb.1:
210+
%y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
211+
PseudoRET

llvm/test/CodeGen/RISCV/rvv/vlopt-same-vl.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@
1212

1313
define <vscale x 4 x i32> @same_vl_imm(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1414
; CHECK: User VL is: 4
15-
; CHECK-NEXT: Abort due to CommonVL == VLOp, no point in reducing.
15+
; CHECK: Abort due to CommonVL == VLOp, no point in reducing.
1616
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 4)
1717
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, i64 4)
1818
ret <vscale x 4 x i32> %w
1919
}
2020

2121
define <vscale x 4 x i32> @same_vl_reg(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 %vl) {
2222
; CHECK: User VL is: %3:gprnox0
23-
; CHECK-NEXT: Abort due to CommonVL == VLOp, no point in reducing.
23+
; CHECK: Abort due to CommonVL == VLOp, no point in reducing.
2424
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 %vl)
2525
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, i64 %vl)
2626
ret <vscale x 4 x i32> %w

0 commit comments

Comments
 (0)