Skip to content

Commit 26a8a85

Browse files
authored
[RISCV] Introduce local peephole to reduce VLs based on demanded VL (#104689)
This is a fairly narrow transform (at the moment) to reduce the VLs of instructions feeding a store with a smaller VL. Note that the goal of this transform isn't really to reduce VL - it's to reduce VL *toggles*. To our knowledge, small reductions in VL without also changing LMUL are generally not profitable on existing hardware. For a single use instruction without side effects, fp exceptions, or a result dependency on VL, reducing VL is legal if only a subset of elements are legal. We'd already implemented this logic for vmv.v.v, and this patch simply applies it to stores as an alternate root. Longer term, I plan to extend this to other root instructions (i.e. different kind of stores, reduces, etc..), and add a more general recursive walkback through operands. One risk with the dataflow based approach is that we could be reducing VL of an instruction scheduled in a region with the wider VL (i.e. mixed mode computations) forcing an additional VL toggle. An example of this is the @insert_subvector_dag_loop test case, but it doesn't appear to happen widely. I think this is a risk we should accept.
1 parent 00baa1a commit 26a8a85

15 files changed

+123
-220
lines changed

llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp

Lines changed: 99 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
6161
}
6262

6363
private:
64+
bool tryToReduceVL(MachineInstr &MI) const;
6465
bool convertToVLMAX(MachineInstr &MI) const;
6566
bool convertToWholeRegister(MachineInstr &MI) const;
6667
bool convertToUnmasked(MachineInstr &MI) const;
@@ -81,6 +82,96 @@ char RISCVVectorPeephole::ID = 0;
8182
INITIALIZE_PASS(RISCVVectorPeephole, DEBUG_TYPE, "RISC-V Fold Masks", false,
8283
false)
8384

85+
/// Given two VL operands, do we know that LHS <= RHS?
86+
static bool isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) {
87+
if (LHS.isReg() && RHS.isReg() && LHS.getReg().isVirtual() &&
88+
LHS.getReg() == RHS.getReg())
89+
return true;
90+
if (RHS.isImm() && RHS.getImm() == RISCV::VLMaxSentinel)
91+
return true;
92+
if (LHS.isImm() && LHS.getImm() == RISCV::VLMaxSentinel)
93+
return false;
94+
if (!LHS.isImm() || !RHS.isImm())
95+
return false;
96+
return LHS.getImm() <= RHS.getImm();
97+
}
98+
99+
static unsigned getSEWLMULRatio(const MachineInstr &MI) {
100+
RISCVII::VLMUL LMUL = RISCVII::getLMul(MI.getDesc().TSFlags);
101+
unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
102+
return RISCVVType::getSEWLMULRatio(1 << Log2SEW, LMUL);
103+
}
104+
105+
// Attempt to reduce the VL of an instruction whose sole use is feeding a
106+
// instruction with a narrower VL. This currently works backwards from the
107+
// user instruction (which might have a smaller VL).
108+
bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const {
109+
// Note that the goal here is a bit multifaceted.
110+
// 1) For store's reducing the VL of the value being stored may help to
111+
// reduce VL toggles. This is somewhat of an artifact of the fact we
112+
// promote arithmetic instructions but VL predicate stores.
113+
// 2) For vmv.v.v reducing VL eagerly on the source instruction allows us
114+
// to share code with the foldVMV_V_V transform below.
115+
//
116+
// Note that to the best of our knowledge, reducing VL is generally not
117+
// a significant win on real hardware unless we can also reduce LMUL which
118+
// this code doesn't try to do.
119+
//
120+
// TODO: We can handle a bunch more instructions here, and probably
121+
// recurse backwards through operands too.
122+
unsigned SrcIdx = 0;
123+
switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
124+
default:
125+
return false;
126+
case RISCV::VSE8_V:
127+
case RISCV::VSE16_V:
128+
case RISCV::VSE32_V:
129+
case RISCV::VSE64_V:
130+
break;
131+
case RISCV::VMV_V_V:
132+
SrcIdx = 2;
133+
break;
134+
}
135+
136+
MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc()));
137+
if (VL.isImm() && VL.getImm() == RISCV::VLMaxSentinel)
138+
return false;
139+
140+
Register SrcReg = MI.getOperand(SrcIdx).getReg();
141+
// Note: one *use*, not one *user*.
142+
if (!MRI->hasOneUse(SrcReg))
143+
return false;
144+
145+
MachineInstr *Src = MRI->getVRegDef(SrcReg);
146+
if (!Src || Src->hasUnmodeledSideEffects() ||
147+
Src->getParent() != MI.getParent() || Src->getNumDefs() != 1 ||
148+
!RISCVII::hasVLOp(Src->getDesc().TSFlags) ||
149+
!RISCVII::hasSEWOp(Src->getDesc().TSFlags))
150+
return false;
151+
152+
// Src needs to have the same VLMAX as MI
153+
if (getSEWLMULRatio(MI) != getSEWLMULRatio(*Src))
154+
return false;
155+
156+
bool ActiveElementsAffectResult = RISCVII::activeElementsAffectResult(
157+
TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags);
158+
if (ActiveElementsAffectResult || Src->mayRaiseFPException())
159+
return false;
160+
161+
MachineOperand &SrcVL = Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
162+
if (VL.isIdenticalTo(SrcVL) || !isVLKnownLE(VL, SrcVL))
163+
return false;
164+
165+
if (VL.isImm())
166+
SrcVL.ChangeToImmediate(VL.getImm());
167+
else if (VL.isReg())
168+
SrcVL.ChangeToRegister(VL.getReg(), false);
169+
170+
// TODO: For instructions with a passthru, we could clear the passthru
171+
// and tail policy since we've just proven the tail is not demanded.
172+
return true;
173+
}
174+
84175
/// Check if an operand is an immediate or a materialized ADDI $x0, imm.
85176
std::optional<unsigned>
86177
RISCVVectorPeephole::getConstant(const MachineOperand &VL) const {
@@ -325,22 +416,6 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const {
325416
return true;
326417
}
327418

328-
/// Given two VL operands, returns the one known to be the smallest or nullptr
329-
/// if unknown.
330-
static const MachineOperand *getKnownMinVL(const MachineOperand *LHS,
331-
const MachineOperand *RHS) {
332-
if (LHS->isReg() && RHS->isReg() && LHS->getReg().isVirtual() &&
333-
LHS->getReg() == RHS->getReg())
334-
return LHS;
335-
if (LHS->isImm() && LHS->getImm() == RISCV::VLMaxSentinel)
336-
return RHS;
337-
if (RHS->isImm() && RHS->getImm() == RISCV::VLMaxSentinel)
338-
return LHS;
339-
if (!LHS->isImm() || !RHS->isImm())
340-
return nullptr;
341-
return LHS->getImm() <= RHS->getImm() ? LHS : RHS;
342-
}
343-
344419
/// Check if it's safe to move From down to To, checking that no physical
345420
/// registers are clobbered.
346421
static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To) {
@@ -362,21 +437,16 @@ static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To) {
362437
return From.isSafeToMove(SawStore);
363438
}
364439

365-
static unsigned getSEWLMULRatio(const MachineInstr &MI) {
366-
RISCVII::VLMUL LMUL = RISCVII::getLMul(MI.getDesc().TSFlags);
367-
unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
368-
return RISCVVType::getSEWLMULRatio(1 << Log2SEW, LMUL);
369-
}
370-
371440
/// If a PseudoVMV_V_V is the only user of its input, fold its passthru and VL
372441
/// into it.
373442
///
374443
/// %x = PseudoVADD_V_V_M1 %passthru, %a, %b, %vl1, sew, policy
375444
/// %y = PseudoVMV_V_V_M1 %passthru, %x, %vl2, sew, policy
445+
/// (where %vl1 <= %vl2, see related tryToReduceVL)
376446
///
377447
/// ->
378448
///
379-
/// %y = PseudoVADD_V_V_M1 %passthru, %a, %b, min(vl1, vl2), sew, policy
449+
/// %y = PseudoVADD_V_V_M1 %passthru, %a, %b, vl1, sew, policy
380450
bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
381451
if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMV_V_V)
382452
return false;
@@ -404,33 +474,16 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
404474
SrcPassthru.getReg() != Passthru.getReg())
405475
return false;
406476

407-
// Because Src and MI have the same passthru, we can use either AVL as long as
408-
// it's the smaller of the two.
409-
//
410-
// (src pt, ..., vl=5) x x x x x|. . .
411-
// (vmv.v.v pt, src, vl=3) x x x|. . . . .
412-
// ->
413-
// (src pt, ..., vl=3) x x x|. . . . .
414-
//
415-
// (src pt, ..., vl=3) x x x|. . . . .
416-
// (vmv.v.v pt, src, vl=6) x x x . . .|. .
417-
// ->
418-
// (src pt, ..., vl=3) x x x|. . . . .
477+
// Src VL will have already been reduced if legal (see tryToReduceVL),
478+
// so we don't need to handle a smaller source VL here. However, the
479+
// user's VL may be larger
419480
MachineOperand &SrcVL = Src->getOperand(RISCVII::getVLOpNum(Src->getDesc()));
420-
const MachineOperand *MinVL = getKnownMinVL(&MI.getOperand(3), &SrcVL);
421-
if (!MinVL)
422-
return false;
423-
424-
bool VLChanged = !MinVL->isIdenticalTo(SrcVL);
425-
bool ActiveElementsAffectResult = RISCVII::activeElementsAffectResult(
426-
TII->get(RISCV::getRVVMCOpcode(Src->getOpcode())).TSFlags);
427-
428-
if (VLChanged && (ActiveElementsAffectResult || Src->mayRaiseFPException()))
481+
if (!isVLKnownLE(SrcVL, MI.getOperand(3)))
429482
return false;
430483

431484
// If Src ends up using MI's passthru/VL, move it so it can access it.
432485
// TODO: We don't need to do this if they already dominate Src.
433-
if (!SrcVL.isIdenticalTo(*MinVL) || !SrcPassthru.isIdenticalTo(Passthru)) {
486+
if (!SrcPassthru.isIdenticalTo(Passthru)) {
434487
if (!isSafeToMove(*Src, MI))
435488
return false;
436489
Src->moveBefore(&MI);
@@ -445,11 +498,6 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
445498
*Src->getParent()->getParent()));
446499
}
447500

448-
if (MinVL->isImm())
449-
SrcVL.ChangeToImmediate(MinVL->getImm());
450-
else if (MinVL->isReg())
451-
SrcVL.ChangeToRegister(MinVL->getReg(), false);
452-
453501
// Use a conservative tu,mu policy, RISCVInsertVSETVLI will relax it if
454502
// passthru is undef.
455503
Src->getOperand(RISCVII::getVecPolicyOpNum(Src->getDesc()))
@@ -498,6 +546,7 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
498546
for (MachineBasicBlock &MBB : MF) {
499547
for (MachineInstr &MI : make_early_inc_range(MBB)) {
500548
Changed |= convertToVLMAX(MI);
549+
Changed |= tryToReduceVL(MI);
501550
Changed |= convertToUnmasked(MI);
502551
Changed |= convertToWholeRegister(MI);
503552
Changed |= convertVMergeToVMv(MI);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ define void @abs_v6i16(ptr %x) {
4141
; CHECK-NEXT: vle16.v v8, (a0)
4242
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4343
; CHECK-NEXT: vrsub.vi v9, v8, 0
44-
; CHECK-NEXT: vmax.vv v8, v8, v9
4544
; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
45+
; CHECK-NEXT: vmax.vv v8, v8, v9
4646
; CHECK-NEXT: vse16.v v8, (a0)
4747
; CHECK-NEXT: ret
4848
%a = load <6 x i16>, ptr %x

0 commit comments

Comments
 (0)