Skip to content

Commit a35afaf

Browse files
committed
[RISCV] Separate doLocalPostpass into new pass and move to post vector regalloc
This patch splits off part of the work to move vsetvli insertion to post regalloc in #70549. The doLocalPostpass operates outside of RISCVInsertVSETVLis dataflow, so we can move it to its own pass. We can then move it to post vector regalloc, which should be a smaller change since it only touches GPR registers. A couple of things that are different from #70549: - This manually fixes up the LiveIntervals rather than recomputing it via createAndComputeVirtRegInterval. I'm not sure if there's much of a difference with either. - For the postpass it's sufficient enough to just check isUndef() in hasUndefinedMergeOp, i.e. we don't need to lookup the def in VNInfo. Running on llvm-test-suite and SPEC CPU 2017 there aren't any changes in the number of vsetvls removed. We could also potentially turn off this pass for unoptimised builds.
1 parent a06073f commit a35afaf

34 files changed

+1323
-1409
lines changed

llvm/lib/Target/RISCV/RISCV.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
6161
FunctionPass *createRISCVInsertVSETVLIPass();
6262
void initializeRISCVInsertVSETVLIPass(PassRegistry &);
6363

64+
FunctionPass *createRISCVCoalesceVSETVLIPass();
65+
void initializeRISCVCoalesceVSETVLIPass(PassRegistry &);
66+
6467
FunctionPass *createRISCVPostRAExpandPseudoPass();
6568
void initializeRISCVPostRAExpandPseudoPass(PassRegistry &);
6669
FunctionPass *createRISCVInsertReadWriteCSRPass();

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 157 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,17 @@
2828
#include "RISCVSubtarget.h"
2929
#include "llvm/ADT/Statistic.h"
3030
#include "llvm/CodeGen/LiveIntervals.h"
31+
#include "llvm/CodeGen/LiveStacks.h"
3132
#include "llvm/CodeGen/MachineFunctionPass.h"
3233
#include <queue>
3334
using namespace llvm;
3435

3536
#define DEBUG_TYPE "riscv-insert-vsetvli"
3637
#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
38+
#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
3739

3840
STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
39-
STATISTIC(NumRemovedVSETVL, "Number of VSETVL inst removed");
41+
STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
4042

4143
static cl::opt<bool> DisableInsertVSETVLPHIOpt(
4244
"riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
@@ -190,6 +192,11 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
190192
if (UseMO.getReg() == RISCV::NoRegister)
191193
return true;
192194

195+
if (UseMO.isUndef())
196+
return true;
197+
if (UseMO.getReg().isPhysical())
198+
return false;
199+
193200
if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
194201
if (UseMI->isImplicitDef())
195202
return true;
@@ -777,18 +784,44 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
777784
VSETVLIInfo &Info) const;
778785
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
779786
void emitVSETVLIs(MachineBasicBlock &MBB);
780-
void doLocalPostpass(MachineBasicBlock &MBB);
781787
void doPRE(MachineBasicBlock &MBB);
782788
void insertReadVL(MachineBasicBlock &MBB);
783789
};
784790

791+
class RISCVCoalesceVSETVLI : public MachineFunctionPass {
792+
public:
793+
static char ID;
794+
795+
RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
796+
bool runOnMachineFunction(MachineFunction &MF) override;
797+
798+
void getAnalysisUsage(AnalysisUsage &AU) const override {
799+
AU.setPreservesCFG();
800+
801+
AU.addRequired<LiveIntervals>();
802+
AU.addPreserved<LiveIntervals>();
803+
AU.addRequired<SlotIndexes>();
804+
AU.addPreserved<SlotIndexes>();
805+
AU.addPreserved<LiveStacks>();
806+
807+
MachineFunctionPass::getAnalysisUsage(AU);
808+
}
809+
810+
StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }
811+
};
812+
785813
} // end anonymous namespace
786814

787815
char RISCVInsertVSETVLI::ID = 0;
788816

789817
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
790818
false, false)
791819

820+
char RISCVCoalesceVSETVLI::ID = 0;
821+
822+
INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
823+
RISCV_COALESCE_VSETVLI_NAME, false, false)
824+
792825
// Return a VSETVLIInfo representing the changes made by this VSETVLI or
793826
// VSETIVLI instruction.
794827
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
@@ -1510,7 +1543,10 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
15101543

15111544
auto &AVL = MI.getOperand(1);
15121545
auto &PrevAVL = PrevMI.getOperand(1);
1513-
assert(MRI.isSSA());
1546+
assert(!AVL.isReg() || !AVL.getReg().isVirtual() ||
1547+
MRI.hasOneDef(AVL.getReg()));
1548+
assert(!PrevAVL.isReg() || !PrevAVL.getReg().isVirtual() ||
1549+
MRI.hasOneDef(PrevAVL.getReg()));
15141550

15151551
// If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
15161552
// For now just check that PrevMI uses the same virtual register.
@@ -1530,64 +1566,6 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
15301566
return areCompatibleVTYPEs(PriorVType, VType, Used);
15311567
}
15321568

1533-
void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1534-
MachineInstr *NextMI = nullptr;
1535-
// We can have arbitrary code in successors, so VL and VTYPE
1536-
// must be considered demanded.
1537-
DemandedFields Used;
1538-
Used.demandVL();
1539-
Used.demandVTYPE();
1540-
SmallVector<MachineInstr*> ToDelete;
1541-
for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1542-
1543-
if (!isVectorConfigInstr(MI)) {
1544-
doUnion(Used, getDemanded(MI, MRI, ST));
1545-
continue;
1546-
}
1547-
1548-
Register VRegDef = MI.getOperand(0).getReg();
1549-
if (VRegDef != RISCV::X0 &&
1550-
!(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
1551-
Used.demandVL();
1552-
1553-
if (NextMI) {
1554-
if (!Used.usedVL() && !Used.usedVTYPE()) {
1555-
ToDelete.push_back(&MI);
1556-
// Leave NextMI unchanged
1557-
continue;
1558-
} else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
1559-
if (!isVLPreservingConfig(*NextMI)) {
1560-
MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
1561-
MI.getOperand(0).setIsDead(false);
1562-
Register OldVLReg;
1563-
if (MI.getOperand(1).isReg())
1564-
OldVLReg = MI.getOperand(1).getReg();
1565-
if (NextMI->getOperand(1).isImm())
1566-
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
1567-
else
1568-
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1569-
if (OldVLReg) {
1570-
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1571-
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1572-
MRI->use_nodbg_empty(OldVLReg))
1573-
VLOpDef->eraseFromParent();
1574-
}
1575-
MI.setDesc(NextMI->getDesc());
1576-
}
1577-
MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1578-
ToDelete.push_back(NextMI);
1579-
// fallthrough
1580-
}
1581-
}
1582-
NextMI = &MI;
1583-
Used = getDemanded(MI, MRI, ST);
1584-
}
1585-
1586-
NumRemovedVSETVL += ToDelete.size();
1587-
for (auto *MI : ToDelete)
1588-
MI->eraseFromParent();
1589-
}
1590-
15911569
void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
15921570
for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
15931571
MachineInstr &MI = *I++;
@@ -1660,15 +1638,6 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
16601638
for (MachineBasicBlock &MBB : MF)
16611639
emitVSETVLIs(MBB);
16621640

1663-
// Now that all vsetvlis are explicit, go through and do block local
1664-
// DSE and peephole based demanded fields based transforms. Note that
1665-
// this *must* be done outside the main dataflow so long as we allow
1666-
// any cross block analysis within the dataflow. We can't have both
1667-
// demanded fields based mutation and non-local analysis in the
1668-
// dataflow at the same time without introducing inconsistencies.
1669-
for (MachineBasicBlock &MBB : MF)
1670-
doLocalPostpass(MBB);
1671-
16721641
// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
16731642
// of VLEFF/VLSEGFF.
16741643
for (MachineBasicBlock &MBB : MF)
@@ -1682,3 +1651,121 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
16821651
FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
16831652
return new RISCVInsertVSETVLI();
16841653
}
1654+
1655+
// Now that all vsetvlis are explicit, go through and do block local
1656+
// DSE and peephole based demanded fields based transforms. Note that
1657+
// this *must* be done outside the main dataflow so long as we allow
1658+
// any cross block analysis within the dataflow. We can't have both
1659+
// demanded fields based mutation and non-local analysis in the
1660+
// dataflow at the same time without introducing inconsistencies.
1661+
bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1662+
// Skip if the vector extension is not enabled.
1663+
auto *ST = &MF.getSubtarget<RISCVSubtarget>();
1664+
if (!ST->hasVInstructions())
1665+
return false;
1666+
1667+
LiveIntervals &LIS = getAnalysis<LiveIntervals>();
1668+
1669+
bool Changed = false;
1670+
1671+
const auto *TII = ST->getInstrInfo();
1672+
auto *MRI = &MF.getRegInfo();
1673+
for (MachineBasicBlock &MBB : MF) {
1674+
MachineInstr *NextMI = nullptr;
1675+
// We can have arbitrary code in successors, so VL and VTYPE
1676+
// must be considered demanded.
1677+
DemandedFields Used;
1678+
Used.demandVL();
1679+
Used.demandVTYPE();
1680+
SmallVector<MachineInstr *> ToDelete;
1681+
for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
1682+
1683+
if (!isVectorConfigInstr(MI)) {
1684+
doUnion(Used, getDemanded(MI, MRI, ST));
1685+
continue;
1686+
}
1687+
1688+
Register VRegDef = MI.getOperand(0).getReg();
1689+
if (VRegDef != RISCV::X0 &&
1690+
!(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
1691+
Used.demandVL();
1692+
1693+
if (NextMI) {
1694+
if (!Used.usedVL() && !Used.usedVTYPE()) {
1695+
ToDelete.push_back(&MI);
1696+
// Leave NextMI unchanged
1697+
continue;
1698+
} else if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
1699+
if (!isVLPreservingConfig(*NextMI)) {
1700+
1701+
Register DefReg = NextMI->getOperand(0).getReg();
1702+
1703+
MI.getOperand(0).setReg(DefReg);
1704+
MI.getOperand(0).setIsDead(false);
1705+
1706+
// The def of DefReg moved to MI, so extend the LiveInterval up to
1707+
// it.
1708+
if (DefReg.isVirtual()) {
1709+
LiveInterval &DefLI = LIS.getInterval(DefReg);
1710+
SlotIndex MISlot = LIS.getInstructionIndex(MI).getRegSlot();
1711+
VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1712+
LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1713+
DefLI.addSegment(S);
1714+
DefVNI->def = MISlot;
1715+
1716+
// DefReg may have had no uses, in which case we need to shrink
1717+
// the LiveInterval up to MI.
1718+
LIS.shrinkToUses(&DefLI);
1719+
}
1720+
1721+
Register OldVLReg;
1722+
if (MI.getOperand(1).isReg())
1723+
OldVLReg = MI.getOperand(1).getReg();
1724+
if (NextMI->getOperand(1).isImm())
1725+
MI.getOperand(1).ChangeToImmediate(
1726+
NextMI->getOperand(1).getImm());
1727+
else
1728+
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(),
1729+
false);
1730+
1731+
// Clear NextMI's AVL early so we're not counting it as a use.
1732+
if (NextMI->getOperand(1).isReg())
1733+
NextMI->getOperand(1).setReg(RISCV::NoRegister);
1734+
1735+
if (OldVLReg) {
1736+
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
1737+
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
1738+
MRI->use_nodbg_empty(OldVLReg)) {
1739+
VLOpDef->eraseFromParent();
1740+
}
1741+
1742+
// NextMI no longer uses OldVLReg so shrink its LiveInterval.
1743+
if (OldVLReg.isVirtual())
1744+
LIS.shrinkToUses(&LIS.getInterval(OldVLReg));
1745+
}
1746+
1747+
MI.setDesc(NextMI->getDesc());
1748+
}
1749+
MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
1750+
ToDelete.push_back(NextMI);
1751+
// fallthrough
1752+
}
1753+
}
1754+
NextMI = &MI;
1755+
Used = getDemanded(MI, MRI, ST);
1756+
}
1757+
1758+
Changed |= !ToDelete.empty();
1759+
NumCoalescedVSETVL += ToDelete.size();
1760+
for (auto *MI : ToDelete) {
1761+
LIS.RemoveMachineInstrFromMaps(*MI);
1762+
MI->eraseFromParent();
1763+
}
1764+
}
1765+
1766+
return Changed;
1767+
}
1768+
1769+
FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() {
1770+
return new RISCVCoalesceVSETVLI();
1771+
}

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
121121
initializeRISCVExpandPseudoPass(*PR);
122122
initializeRISCVFoldMasksPass(*PR);
123123
initializeRISCVInsertVSETVLIPass(*PR);
124+
initializeRISCVCoalesceVSETVLIPass(*PR);
124125
initializeRISCVInsertReadWriteCSRPass(*PR);
125126
initializeRISCVInsertWriteVXRMPass(*PR);
126127
initializeRISCVDAGToDAGISelPass(*PR);
@@ -394,6 +395,7 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
394395
bool RISCVPassConfig::addRegAssignAndRewriteFast() {
395396
if (EnableSplitRegAlloc)
396397
addPass(createRVVRegAllocPass(false));
398+
addPass(createRISCVCoalesceVSETVLIPass());
397399
return TargetPassConfig::addRegAssignAndRewriteFast();
398400
}
399401

@@ -402,6 +404,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
402404
addPass(createRVVRegAllocPass(true));
403405
addPass(createVirtRegRewriter(false));
404406
}
407+
addPass(createRISCVCoalesceVSETVLIPass());
405408
return TargetPassConfig::addRegAssignAndRewriteOptimized();
406409
}
407410

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1407,8 +1407,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float
14071407
; CHECK-NEXT: vfmv.v.f v8, fa4
14081408
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
14091409
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
1410-
; CHECK-NEXT: vmv.v.i v0, 15
14111410
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
1411+
; CHECK-NEXT: vmv.v.i v0, 15
14121412
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
14131413
; CHECK-NEXT: ret
14141414
%v0 = insertelement <8 x float> poison, float %e0, i64 0
@@ -1458,8 +1458,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
14581458
; CHECK-NEXT: vfmv.v.f v8, fa4
14591459
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
14601460
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
1461-
; CHECK-NEXT: vmv.v.i v0, 15
14621461
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
1462+
; CHECK-NEXT: vmv.v.i v0, 15
14631463
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
14641464
; CHECK-NEXT: ret
14651465
%v0 = insertelement <8 x double> poison, double %e0, i64 0

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
5757
; RV32-V512-NEXT: vid.v v10
5858
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
5959
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
60-
; RV32-V512-NEXT: vmv.v.i v0, 10
6160
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
61+
; RV32-V512-NEXT: vmv.v.i v0, 10
6262
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
6363
; RV32-V512-NEXT: vmv.v.v v8, v10
6464
; RV32-V512-NEXT: ret
@@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
6868
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
6969
; RV64-V512-NEXT: vid.v v10
7070
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
71-
; RV64-V512-NEXT: vmv.v.i v0, 10
7271
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
72+
; RV64-V512-NEXT: vmv.v.i v0, 10
7373
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
7474
; RV64-V512-NEXT: vmv.v.v v8, v10
7575
; RV64-V512-NEXT: ret

0 commit comments

Comments
 (0)