Skip to content

Commit 02d3d0e

Browse files
committed
[RISCV] Separate doLocalPostpass into new pass and move to post vector regalloc
This patch splits off part of the work to move vsetvli insertion to post regalloc in #70549. The doLocalPostpass operates outside of RISCVInsertVSETVLis dataflow, so we can move it to its own pass. We can then move it to post vector regalloc, which should be a smaller change since it only touches GPR registers. A couple of things that are different from #70549: - This manually fixes up the LiveIntervals rather than recomputing it via createAndComputeVirtRegInterval. I'm not sure if there's much of a difference with either. - For the postpass it's sufficient enough to just check isUndef() in hasUndefinedMergeOp, i.e. we don't need to lookup the def in VNInfo. Running on llvm-test-suite and SPEC CPU 2017 there aren't any changes in the number of vsetvls removed. We could also potentially turn off this pass for unoptimised builds.
1 parent bfd1944 commit 02d3d0e

34 files changed

+1273
-1355
lines changed

llvm/lib/Target/RISCV/RISCV.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
6161
FunctionPass *createRISCVInsertVSETVLIPass();
6262
void initializeRISCVInsertVSETVLIPass(PassRegistry &);
6363

64+
FunctionPass *createRISCVCoalesceVSETVLIPass();
65+
void initializeRISCVCoalesceVSETVLIPass(PassRegistry &);
66+
6467
FunctionPass *createRISCVPostRAExpandPseudoPass();
6568
void initializeRISCVPostRAExpandPseudoPass(PassRegistry &);
6669
FunctionPass *createRISCVInsertReadWriteCSRPass();

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 107 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,17 @@
2828
#include "RISCVSubtarget.h"
2929
#include "llvm/ADT/Statistic.h"
3030
#include "llvm/CodeGen/LiveIntervals.h"
31+
#include "llvm/CodeGen/LiveStacks.h"
3132
#include "llvm/CodeGen/MachineFunctionPass.h"
3233
#include <queue>
3334
using namespace llvm;
3435

3536
#define DEBUG_TYPE "riscv-insert-vsetvli"
3637
#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
38+
#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
3739

3840
STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
39-
STATISTIC(NumRemovedVSETVL, "Number of VSETVL inst removed");
41+
STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
4042

4143
static cl::opt<bool> DisableInsertVSETVLPHIOpt(
4244
"riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
@@ -190,6 +192,11 @@ static bool hasUndefinedMergeOp(const MachineInstr &MI,
190192
if (UseMO.getReg() == RISCV::NoRegister)
191193
return true;
192194

195+
if (UseMO.isUndef())
196+
return true;
197+
if (UseMO.getReg().isPhysical())
198+
return false;
199+
193200
if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
194201
if (UseMI->isImplicitDef())
195202
return true;
@@ -778,18 +785,51 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
778785
VSETVLIInfo &Info) const;
779786
void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
780787
void emitVSETVLIs(MachineBasicBlock &MBB);
781-
void doLocalPostpass(MachineBasicBlock &MBB);
782788
void doPRE(MachineBasicBlock &MBB);
783789
void insertReadVL(MachineBasicBlock &MBB);
784790
};
785791

792+
class RISCVCoalesceVSETVLI : public MachineFunctionPass {
793+
public:
794+
static char ID;
795+
const RISCVSubtarget *ST;
796+
const TargetInstrInfo *TII;
797+
MachineRegisterInfo *MRI;
798+
LiveIntervals *LIS;
799+
800+
RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
801+
bool runOnMachineFunction(MachineFunction &MF) override;
802+
803+
void getAnalysisUsage(AnalysisUsage &AU) const override {
804+
AU.setPreservesCFG();
805+
806+
AU.addRequired<LiveIntervals>();
807+
AU.addPreserved<LiveIntervals>();
808+
AU.addRequired<SlotIndexes>();
809+
AU.addPreserved<SlotIndexes>();
810+
AU.addPreserved<LiveStacks>();
811+
812+
MachineFunctionPass::getAnalysisUsage(AU);
813+
}
814+
815+
StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }
816+
817+
private:
818+
bool runOnBasicBlock(MachineBasicBlock &MF);
819+
};
820+
786821
} // end anonymous namespace
787822

788823
char RISCVInsertVSETVLI::ID = 0;
789824

790825
INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
791826
false, false)
792827

828+
char RISCVCoalesceVSETVLI::ID = 0;
829+
830+
INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
831+
RISCV_COALESCE_VSETVLI_NAME, false, false)
832+
793833
// Return a VSETVLIInfo representing the changes made by this VSETVLI or
794834
// VSETIVLI instruction.
795835
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
@@ -1511,7 +1551,10 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
15111551

15121552
auto &AVL = MI.getOperand(1);
15131553
auto &PrevAVL = PrevMI.getOperand(1);
1514-
assert(MRI.isSSA());
1554+
assert(!AVL.isReg() || !AVL.getReg().isVirtual() ||
1555+
MRI.hasOneDef(AVL.getReg()));
1556+
assert(!PrevAVL.isReg() || !PrevAVL.getReg().isVirtual() ||
1557+
MRI.hasOneDef(PrevAVL.getReg()));
15151558

15161559
// If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
15171560
// For now just check that PrevMI uses the same virtual register.
@@ -1526,7 +1569,7 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
15261569
return areCompatibleVTYPEs(PriorVType, VType, Used);
15271570
}
15281571

1529-
void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
1572+
bool RISCVCoalesceVSETVLI::runOnBasicBlock(MachineBasicBlock &MBB) {
15301573
MachineInstr *NextMI = nullptr;
15311574
// We can have arbitrary code in successors, so VL and VTYPE
15321575
// must be considered demanded.
@@ -1558,20 +1601,47 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
15581601

15591602
if (canMutatePriorConfig(MI, *NextMI, Used, *MRI)) {
15601603
if (!isVLPreservingConfig(*NextMI)) {
1561-
MI.getOperand(0).setReg(NextMI->getOperand(0).getReg());
1604+
Register DefReg = NextMI->getOperand(0).getReg();
1605+
1606+
MI.getOperand(0).setReg(DefReg);
15621607
MI.getOperand(0).setIsDead(false);
1608+
1609+
// The def of DefReg moved to MI, so extend the LiveInterval up to
1610+
// it.
1611+
if (DefReg.isVirtual()) {
1612+
LiveInterval &DefLI = LIS->getInterval(DefReg);
1613+
SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
1614+
VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1615+
LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1616+
DefLI.addSegment(S);
1617+
DefVNI->def = MISlot;
1618+
1619+
// DefReg may have had no uses, in which case we need to shrink
1620+
// the LiveInterval up to MI.
1621+
LIS->shrinkToUses(&DefLI);
1622+
}
1623+
15631624
Register OldVLReg;
15641625
if (MI.getOperand(1).isReg())
15651626
OldVLReg = MI.getOperand(1).getReg();
15661627
if (NextMI->getOperand(1).isImm())
15671628
MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
15681629
else
15691630
MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
1631+
1632+
// Clear NextMI's AVL early so we're not counting it as a use.
1633+
if (NextMI->getOperand(1).isReg())
1634+
NextMI->getOperand(1).setReg(RISCV::NoRegister);
1635+
15701636
if (OldVLReg) {
15711637
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
15721638
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
15731639
MRI->use_nodbg_empty(OldVLReg))
15741640
VLOpDef->eraseFromParent();
1641+
1642+
// NextMI no longer uses OldVLReg so shrink its LiveInterval.
1643+
if (OldVLReg.isVirtual())
1644+
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
15751645
}
15761646
MI.setDesc(NextMI->getDesc());
15771647
}
@@ -1584,9 +1654,13 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
15841654
Used = getDemanded(MI, MRI, ST);
15851655
}
15861656

1587-
NumRemovedVSETVL += ToDelete.size();
1588-
for (auto *MI : ToDelete)
1657+
NumCoalescedVSETVL += ToDelete.size();
1658+
for (auto *MI : ToDelete) {
1659+
LIS->RemoveMachineInstrFromMaps(*MI);
15891660
MI->eraseFromParent();
1661+
}
1662+
1663+
return !ToDelete.empty();
15901664
}
15911665

15921666
void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
@@ -1661,15 +1735,6 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
16611735
for (MachineBasicBlock &MBB : MF)
16621736
emitVSETVLIs(MBB);
16631737

1664-
// Now that all vsetvlis are explicit, go through and do block local
1665-
// DSE and peephole based demanded fields based transforms. Note that
1666-
// this *must* be done outside the main dataflow so long as we allow
1667-
// any cross block analysis within the dataflow. We can't have both
1668-
// demanded fields based mutation and non-local analysis in the
1669-
// dataflow at the same time without introducing inconsistencies.
1670-
for (MachineBasicBlock &MBB : MF)
1671-
doLocalPostpass(MBB);
1672-
16731738
// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
16741739
// of VLEFF/VLSEGFF.
16751740
for (MachineBasicBlock &MBB : MF)
@@ -1683,3 +1748,29 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
16831748
FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
16841749
return new RISCVInsertVSETVLI();
16851750
}
1751+
1752+
// Now that all vsetvlis are explicit, go through and do block local
1753+
// DSE and peephole based demanded fields based transforms. Note that
1754+
// this *must* be done outside the main dataflow so long as we allow
1755+
// any cross block analysis within the dataflow. We can't have both
1756+
// demanded fields based mutation and non-local analysis in the
1757+
// dataflow at the same time without introducing inconsistencies.
1758+
bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1759+
// Skip if the vector extension is not enabled.
1760+
ST = &MF.getSubtarget<RISCVSubtarget>();
1761+
if (!ST->hasVInstructions())
1762+
return false;
1763+
TII = ST->getInstrInfo();
1764+
MRI = &MF.getRegInfo();
1765+
LIS = &getAnalysis<LiveIntervals>();
1766+
1767+
bool Changed = false;
1768+
for (MachineBasicBlock &MBB : MF)
1769+
Changed |= runOnBasicBlock(MBB);
1770+
1771+
return Changed;
1772+
}
1773+
1774+
FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() {
1775+
return new RISCVCoalesceVSETVLI();
1776+
}

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
121121
initializeRISCVExpandPseudoPass(*PR);
122122
initializeRISCVFoldMasksPass(*PR);
123123
initializeRISCVInsertVSETVLIPass(*PR);
124+
initializeRISCVCoalesceVSETVLIPass(*PR);
124125
initializeRISCVInsertReadWriteCSRPass(*PR);
125126
initializeRISCVInsertWriteVXRMPass(*PR);
126127
initializeRISCVDAGToDAGISelPass(*PR);
@@ -394,6 +395,7 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
394395
bool RISCVPassConfig::addRegAssignAndRewriteFast() {
395396
if (EnableSplitRegAlloc)
396397
addPass(createRVVRegAllocPass(false));
398+
addPass(createRISCVCoalesceVSETVLIPass());
397399
return TargetPassConfig::addRegAssignAndRewriteFast();
398400
}
399401

@@ -402,6 +404,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
402404
addPass(createRVVRegAllocPass(true));
403405
addPass(createVirtRegRewriter(false));
404406
}
407+
addPass(createRISCVCoalesceVSETVLIPass());
405408
return TargetPassConfig::addRegAssignAndRewriteOptimized();
406409
}
407410

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1407,8 +1407,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float
14071407
; CHECK-NEXT: vfmv.v.f v8, fa4
14081408
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
14091409
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
1410-
; CHECK-NEXT: vmv.v.i v0, 15
14111410
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
1411+
; CHECK-NEXT: vmv.v.i v0, 15
14121412
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
14131413
; CHECK-NEXT: ret
14141414
%v0 = insertelement <8 x float> poison, float %e0, i64 0
@@ -1458,8 +1458,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d
14581458
; CHECK-NEXT: vfmv.v.f v8, fa4
14591459
; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
14601460
; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
1461-
; CHECK-NEXT: vmv.v.i v0, 15
14621461
; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
1462+
; CHECK-NEXT: vmv.v.i v0, 15
14631463
; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t
14641464
; CHECK-NEXT: ret
14651465
%v0 = insertelement <8 x double> poison, double %e0, i64 0

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
5757
; RV32-V512-NEXT: vid.v v10
5858
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
5959
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
60-
; RV32-V512-NEXT: vmv.v.i v0, 10
6160
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
61+
; RV32-V512-NEXT: vmv.v.i v0, 10
6262
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
6363
; RV32-V512-NEXT: vmv.v.v v8, v10
6464
; RV32-V512-NEXT: ret
@@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
6868
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
6969
; RV64-V512-NEXT: vid.v v10
7070
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
71-
; RV64-V512-NEXT: vmv.v.i v0, 10
7271
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
72+
; RV64-V512-NEXT: vmv.v.i v0, 10
7373
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
7474
; RV64-V512-NEXT: vmv.v.v v8, v10
7575
; RV64-V512-NEXT: ret

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -395,8 +395,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
395395
; RV32-NEXT: fmin.d fa5, fa5, fa4
396396
; RV32-NEXT: fcvt.w.d a2, fa5, rtz
397397
; RV32-NEXT: and a0, a0, a2
398-
; RV32-NEXT: vmv.v.i v0, 15
399398
; RV32-NEXT: vslide1down.vx v9, v9, a0
399+
; RV32-NEXT: vmv.v.i v0, 15
400400
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
401401
; RV32-NEXT: vse8.v v9, (a1)
402402
; RV32-NEXT: addi sp, s0, -128
@@ -496,8 +496,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
496496
; RV64-NEXT: fmin.d fa5, fa5, fa4
497497
; RV64-NEXT: fcvt.l.d a2, fa5, rtz
498498
; RV64-NEXT: and a0, a0, a2
499-
; RV64-NEXT: vmv.v.i v0, 15
500499
; RV64-NEXT: vslide1down.vx v9, v9, a0
500+
; RV64-NEXT: vmv.v.i v0, 15
501501
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
502502
; RV64-NEXT: vse8.v v9, (a1)
503503
; RV64-NEXT: addi sp, s0, -128
@@ -580,8 +580,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
580580
; RV32-NEXT: fmax.d fa4, fa4, fa3
581581
; RV32-NEXT: fmin.d fa5, fa4, fa5
582582
; RV32-NEXT: fcvt.wu.d a0, fa5, rtz
583-
; RV32-NEXT: vmv.v.i v0, 15
584583
; RV32-NEXT: vslide1down.vx v9, v9, a0
584+
; RV32-NEXT: vmv.v.i v0, 15
585585
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
586586
; RV32-NEXT: vse8.v v9, (a1)
587587
; RV32-NEXT: addi sp, s0, -128
@@ -656,8 +656,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
656656
; RV64-NEXT: fmax.d fa4, fa4, fa3
657657
; RV64-NEXT: fmin.d fa5, fa4, fa5
658658
; RV64-NEXT: fcvt.lu.d a0, fa5, rtz
659-
; RV64-NEXT: vmv.v.i v0, 15
660659
; RV64-NEXT: vslide1down.vx v9, v9, a0
660+
; RV64-NEXT: vmv.v.i v0, 15
661661
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
662662
; RV64-NEXT: vse8.v v9, (a1)
663663
; RV64-NEXT: addi sp, s0, -128

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
7070
; RV32-V512-NEXT: vid.v v10
7171
; RV32-V512-NEXT: vsrl.vi v11, v10, 1
7272
; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu
73-
; RV32-V512-NEXT: vmv.v.i v0, 10
7473
; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11
74+
; RV32-V512-NEXT: vmv.v.i v0, 10
7575
; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t
7676
; RV32-V512-NEXT: vmv.v.v v8, v10
7777
; RV32-V512-NEXT: ret
@@ -81,8 +81,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
8181
; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu
8282
; RV64-V512-NEXT: vid.v v10
8383
; RV64-V512-NEXT: vsrl.vi v11, v10, 1
84-
; RV64-V512-NEXT: vmv.v.i v0, 10
8584
; RV64-V512-NEXT: vrgather.vv v10, v8, v11
85+
; RV64-V512-NEXT: vmv.v.i v0, 10
8686
; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t
8787
; RV64-V512-NEXT: vmv.v.v v8, v10
8888
; RV64-V512-NEXT: ret
@@ -195,8 +195,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
195195
; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu
196196
; V128-NEXT: vid.v v8
197197
; V128-NEXT: vsrl.vi v8, v8, 1
198-
; V128-NEXT: vmv.v.i v0, 10
199198
; V128-NEXT: vadd.vi v8, v8, 1
199+
; V128-NEXT: vmv.v.i v0, 10
200200
; V128-NEXT: vrgather.vv v10, v9, v8, v0.t
201201
; V128-NEXT: vmv.v.v v8, v10
202202
; V128-NEXT: ret
@@ -210,8 +210,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
210210
; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu
211211
; V512-NEXT: vid.v v8
212212
; V512-NEXT: vsrl.vi v8, v8, 1
213-
; V512-NEXT: vmv.v.i v0, 10
214213
; V512-NEXT: vadd.vi v8, v8, 1
214+
; V512-NEXT: vmv.v.i v0, 10
215215
; V512-NEXT: vrgather.vv v10, v9, v8, v0.t
216216
; V512-NEXT: vmv1r.v v8, v10
217217
; V512-NEXT: ret

0 commit comments

Comments
 (0)