-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Move RISCVInsertVSETVLI to after phi elimination #91440
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) ChangesSplit off from #70549, this patch moves RISCVInsertVSETVLI to after phi elimination where we exit SSA and need to move to LiveVariables. The motivation for splitting this off is to avoid the large scheduling diffs from moving completely to after regalloc, and instead focus on converting the pass to work on LiveIntervals. The two main changes required are updating VSETVLIInfo to store VNInfos instead of MachineInstrs, which allows us to still check for PHI defs in needVSETVLIPHI, and fixing up the live intervals of any AVL operands after inserting new instructions. On O3 the pass is inserted after the register coalescer, otherwise we end up with a bunch of COPYs around eliminated PHIs that trip up needVSETVLIPHI. Note that this manually fixes up the LiveIntervals instead of recomputing them as is currently done in #70549, since it seems to avoid most of the changes in spills and reloads that we were seeing. Specifically LIS->handleMove seemed to change the SlotIndex of instructions which might have affected regalloc. After this patch moving to post regalloc should be a matter of moving the pass in RISCVTargetMachines wihout any further changes to RISCVInsertVSETVLI itself. Co-authored-by: Piyou Chen <[email protected]> Patch is 1.73 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91440.diff 110 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 5f8b610e52338..f2cf40483fcd4 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -47,6 +47,14 @@ static cl::opt<bool> DisableInsertVSETVLPHIOpt(
namespace {
+static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
+ const LiveIntervals *LIS) {
+ auto &LI = LIS->getInterval(Reg);
+ SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI);
+ VNInfo *VNI = LI.getVNInfoBefore(SI);
+ return VNI;
+}
+
static unsigned getVLOpNum(const MachineInstr &MI) {
return RISCVII::getVLOpNum(MI.getDesc());
}
@@ -337,9 +345,7 @@ static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
}
/// Return the fields and properties demanded by the provided instruction.
-DemandedFields getDemanded(const MachineInstr &MI,
- const MachineRegisterInfo *MRI,
- const RISCVSubtarget *ST) {
+DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
// Warning: This function has to work on both the lowered (i.e. post
// emitVSETVLIs) and pre-lowering forms. The main implication of this is
// that it can't use the value of a SEW, VL, or Policy operand as they might
@@ -428,7 +434,7 @@ DemandedFields getDemanded(const MachineInstr &MI,
/// values of the VL and VTYPE registers after insertion.
class VSETVLIInfo {
struct AVLDef {
- const MachineInstr *DefMI;
+ const VNInfo *AVLVNInfo;
Register DefReg;
};
union {
@@ -467,9 +473,9 @@ class VSETVLIInfo {
void setUnknown() { State = Unknown; }
bool isUnknown() const { return State == Unknown; }
- void setAVLRegDef(const MachineInstr *DefMI, Register AVLReg) {
- assert(DefMI && AVLReg.isVirtual());
- AVLRegDef.DefMI = DefMI;
+ void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
+ assert(VNInfo && AVLReg.isVirtual());
+ AVLRegDef.AVLVNInfo = VNInfo;
AVLRegDef.DefReg = AVLReg;
State = AVLIsReg;
}
@@ -495,9 +501,9 @@ class VSETVLIInfo {
assert(hasAVLImm());
return AVLImm;
}
- const MachineInstr &getAVLDefMI() const {
- assert(hasAVLReg() && AVLRegDef.DefMI);
- return *AVLRegDef.DefMI;
+ const VNInfo *getAVLVNInfo() const {
+ assert(hasAVLReg());
+ return AVLRegDef.AVLVNInfo;
}
void setAVL(VSETVLIInfo Info) {
@@ -505,7 +511,7 @@ class VSETVLIInfo {
if (Info.isUnknown())
setUnknown();
else if (Info.hasAVLReg())
- setAVLRegDef(&Info.getAVLDefMI(), Info.getAVLReg());
+ setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
else if (Info.hasAVLVLMAX())
setAVLVLMAX();
else if (Info.hasAVLIgnored())
@@ -521,11 +527,13 @@ class VSETVLIInfo {
bool getTailAgnostic() const { return TailAgnostic; }
bool getMaskAgnostic() const { return MaskAgnostic; }
- bool hasNonZeroAVL() const {
+ bool hasNonZeroAVL(const LiveIntervals *LIS) const {
if (hasAVLImm())
return getAVLImm() > 0;
- if (hasAVLReg())
- return isNonZeroLoadImmediate(getAVLDefMI());
+ if (hasAVLReg()) {
+ if (auto *DefMI = LIS->getInstructionFromIndex(getAVLVNInfo()->def))
+ return isNonZeroLoadImmediate(*DefMI);
+ }
if (hasAVLVLMAX())
return true;
if (hasAVLIgnored())
@@ -533,16 +541,17 @@ class VSETVLIInfo {
return false;
}
- bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const {
+ bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
+ const LiveIntervals *LIS) const {
if (hasSameAVL(Other))
return true;
- return (hasNonZeroAVL() && Other.hasNonZeroAVL());
+ return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
}
bool hasSameAVL(const VSETVLIInfo &Other) const {
if (hasAVLReg() && Other.hasAVLReg())
- return AVLRegDef.DefMI == Other.AVLRegDef.DefMI &&
- AVLRegDef.DefReg == Other.AVLRegDef.DefReg;
+ return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
+ getAVLReg() == Other.getAVLReg();
if (hasAVLImm() && Other.hasAVLImm())
return getAVLImm() == Other.getAVLImm();
@@ -622,7 +631,7 @@ class VSETVLIInfo {
// Require are compatible with the previous vsetvli instruction represented
// by this. MI is the instruction whose requirements we're considering.
bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
- const MachineRegisterInfo &MRI) const {
+ const LiveIntervals *LIS) const {
assert(isValid() && Require.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!Require.SEWLMULRatioOnly &&
@@ -638,7 +647,7 @@ class VSETVLIInfo {
if (Used.VLAny && !(hasSameAVL(Require) && hasSameVLMAX(Require)))
return false;
- if (Used.VLZeroness && !hasEquallyZeroAVL(Require))
+ if (Used.VLZeroness && !hasEquallyZeroAVL(Require, LIS))
return false;
return hasCompatibleVTYPE(Used, Require);
@@ -767,6 +776,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
std::vector<BlockData> BlockInfo;
std::queue<const MachineBasicBlock *> WorkList;
@@ -779,6 +789,14 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addPreserved<LiveStacks>();
+
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -850,7 +868,7 @@ INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
// Return a VSETVLIInfo representing the changes made by this VSETVLI or
// VSETIVLI instruction.
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
+ const LiveIntervals *LIS) {
VSETVLIInfo NewInfo;
if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
NewInfo.setAVLImm(MI.getOperand(1).getImm());
@@ -863,7 +881,7 @@ static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI,
if (AVLReg == RISCV::X0)
NewInfo.setAVLVLMAX();
else
- NewInfo.setAVLRegDef(MRI.getUniqueVRegDef(AVLReg), AVLReg);
+ NewInfo.setAVLRegDef(getVNInfoFromReg(AVLReg, MI, LIS), AVLReg);
}
NewInfo.setVTYPE(MI.getOperand(2).getImm());
@@ -882,7 +900,7 @@ static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
const RISCVSubtarget &ST,
- const MachineRegisterInfo *MRI) {
+ const LiveIntervals *LIS) {
VSETVLIInfo InstrInfo;
bool TailAgnostic = true;
@@ -935,7 +953,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
else
InstrInfo.setAVLImm(Imm);
} else {
- InstrInfo.setAVLRegDef(MRI->getUniqueVRegDef(VLOp.getReg()),
+ InstrInfo.setAVLRegDef(getVNInfoFromReg(VLOp.getReg(), MI, LIS),
VLOp.getReg());
}
} else {
@@ -957,9 +975,10 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
// register AVLs to avoid extending live ranges without being sure we can
// kill the original source reg entirely.
if (InstrInfo.hasAVLReg()) {
- const MachineInstr &DefMI = InstrInfo.getAVLDefMI();
- if (isVectorConfigInstr(DefMI)) {
- VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(DefMI, *MRI);
+ const MachineInstr *DefMI =
+ LIS->getInstructionFromIndex(InstrInfo.getAVLVNInfo()->def);
+ if (DefMI && isVectorConfigInstr(*DefMI)) {
+ VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(*DefMI, LIS);
if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
(DefInstrInfo.hasAVLImm() || DefInstrInfo.hasAVLVLMAX()))
InstrInfo.setAVL(DefInstrInfo);
@@ -985,11 +1004,12 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
// Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
// VLMAX.
if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addReg(RISCV::X0, RegState::Kill)
- .addImm(Info.encodeVTYPE())
- .addReg(RISCV::VL, RegState::Implicit);
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(Info.encodeVTYPE())
+ .addReg(RISCV::VL, RegState::Implicit);
+ LIS->InsertMachineInstrInMaps(*MI);
return;
}
@@ -997,15 +1017,17 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
// it has the same VLMAX we want and the last VL/VTYPE we observed is the
// same, we can use the X0, X0 form.
if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg()) {
- const MachineInstr &DefMI = Info.getAVLDefMI();
- if (isVectorConfigInstr(DefMI)) {
- VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI);
+ const MachineInstr *DefMI =
+ LIS->getInstructionFromIndex(Info.getAVLVNInfo()->def);
+ if (DefMI && isVectorConfigInstr(*DefMI)) {
+ VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, LIS);
if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addReg(RISCV::X0, RegState::Kill)
- .addImm(Info.encodeVTYPE())
- .addReg(RISCV::VL, RegState::Implicit);
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(Info.encodeVTYPE())
+ .addReg(RISCV::VL, RegState::Implicit);
+ LIS->InsertMachineInstrInMaps(*MI);
return;
}
}
@@ -1013,10 +1035,11 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
}
if (Info.hasAVLImm()) {
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addImm(Info.getAVLImm())
- .addImm(Info.encodeVTYPE());
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addImm(Info.getAVLImm())
+ .addImm(Info.encodeVTYPE());
+ LIS->InsertMachineInstrInMaps(*MI);
return;
}
@@ -1025,36 +1048,43 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
// the previous vl to become invalid.
if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
Info.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addReg(RISCV::X0, RegState::Kill)
- .addImm(Info.encodeVTYPE())
- .addReg(RISCV::VL, RegState::Implicit);
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(Info.encodeVTYPE())
+ .addReg(RISCV::VL, RegState::Implicit);
+ LIS->InsertMachineInstrInMaps(*MI);
return;
}
// Otherwise use an AVL of 1 to avoid depending on previous vl.
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addImm(1)
- .addImm(Info.encodeVTYPE());
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addImm(1)
+ .addImm(Info.encodeVTYPE());
+ LIS->InsertMachineInstrInMaps(*MI);
return;
}
if (Info.hasAVLVLMAX()) {
Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
- .addReg(DestReg, RegState::Define | RegState::Dead)
- .addReg(RISCV::X0, RegState::Kill)
- .addImm(Info.encodeVTYPE());
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
+ .addReg(DestReg, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(Info.encodeVTYPE());
+ LIS->InsertMachineInstrInMaps(*MI);
+ LIS->createAndComputeVirtRegInterval(DestReg);
return;
}
Register AVLReg = Info.getAVLReg();
MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addReg(AVLReg)
- .addImm(Info.encodeVTYPE());
+ auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLI))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(AVLReg)
+ .addImm(Info.encodeVTYPE());
+ LIS->InsertMachineInstrInMaps(*MI);
+ LIS->getInterval(AVLReg).extendInBlock(
+ LIS->getMBBStartIdx(&MBB), LIS->getInstructionIndex(*MI).getRegSlot());
}
static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
@@ -1067,12 +1097,12 @@ static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
const VSETVLIInfo &Require,
const VSETVLIInfo &CurInfo) const {
- assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI));
+ assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, LIS));
if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
return true;
- DemandedFields Used = getDemanded(MI, MRI, ST);
+ DemandedFields Used = getDemanded(MI, ST);
// A slidedown/slideup with an *undefined* merge op can freely clobber
// elements not copied from the source vector (e.g. masked off, tail, or
@@ -1108,7 +1138,7 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
Used.TailPolicy = false;
}
- if (CurInfo.isCompatible(Used, Require, *MRI))
+ if (CurInfo.isCompatible(Used, Require, LIS))
return false;
// We didn't find a compatible value. If our AVL is a virtual register,
@@ -1116,9 +1146,10 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
// and the last VL/VTYPE we observed is the same, we don't need a
// VSETVLI here.
if (Require.hasAVLReg() && CurInfo.hasCompatibleVTYPE(Used, Require)) {
- const MachineInstr &DefMI = Require.getAVLDefMI();
- if (isVectorConfigInstr(DefMI)) {
- VSETVLIInfo DefInfo = getInfoForVSETVLI(DefMI, *MRI);
+ const MachineInstr *DefMI =
+ LIS->getInstructionFromIndex(Require.getAVLVNInfo()->def);
+ if (DefMI && isVectorConfigInstr(*DefMI)) {
+ VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, LIS);
if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
return false;
}
@@ -1154,7 +1185,7 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
if (!RISCVII::hasSEWOp(TSFlags))
return;
- const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, MRI);
+ const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, *ST, LIS);
assert(NewInfo.isValid() && !NewInfo.isUnknown());
if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
return;
@@ -1163,7 +1194,7 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
if (!Info.isValid() || Info.isUnknown())
Info = NewInfo;
- DemandedFields Demanded = getDemanded(MI, MRI, ST);
+ DemandedFields Demanded = getDemanded(MI, ST);
const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
// If MI only demands that VL has the same zeroness, we only need to set the
@@ -1173,7 +1204,7 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
// variant, so we avoid the transform to prevent extending live range of an
// avl register operand.
// TODO: We can probably relax this for immediates.
- bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo) &&
+ bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(PrevInfo, LIS) &&
IncomingInfo.hasSameVLMAX(PrevInfo);
if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
Info.setAVL(IncomingInfo);
@@ -1204,14 +1235,17 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
const MachineInstr &MI) const {
if (isVectorConfigInstr(MI)) {
- Info = getInfoForVSETVLI(MI, *MRI);
+ Info = getInfoForVSETVLI(MI, LIS);
return;
}
if (RISCV::isFaultFirstLoad(MI)) {
// Update AVL to vl-output of the fault first load.
- Info.setAVLRegDef(MRI->getUniqueVRegDef(MI.getOperand(1).getReg()),
- MI.getOperand(1).getReg());
+ assert(MI.getOperand(1).getReg().isVirtual());
+ auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
+ SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
+ VNInfo *VNI = LI.getVNInfoAt(SI);
+ Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
return;
}
@@ -1295,7 +1329,7 @@ void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
}
// If we weren't able to prove a vsetvli was directly unneeded, it might still
-// be unneeded if the AVL is a phi node where all incoming values are VL
+// be unneeded if the AVL was a phi node where all incoming values are VL
// outputs from the last VSETVLI in their respective basic blocks.
bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
const MachineBasicBlock &MBB) const {
@@ -1305,26 +1339,27 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
if (!Require.hasAVLReg())
return true;
- // We need the AVL to be produce by a PHI node in this basic block.
- const MachineInstr *PHI = &Require.getAVLDefMI();
- if (PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
+ // We need the AVL to have been produced by a PHI node in this basic block.
+ const VNInfo *Valno = Require.getAVLVNInfo();
+ if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
return true;
- for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
- PHIOp += 2) {
- Register InReg = PHI->getOperand(PHIOp).getReg();
- MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
+ const LiveRange &LR = LIS->getInterval(Require.getAVLReg());
+
+ for (auto *PBB : MBB.predecessors()) {
const VSETVLIInfo &PBBExit = BlockInfo[PBB->getNumber()].Exit;
// We need the PHI input to the be the output of a VSET(I)VLI.
- MachineInstr *DefMI = MRI->getUniqueVRegDef(InReg);
- assert(DefMI);
- if (!isVectorConfigInstr(*DefMI))
+ const VNInfo *Value = LR.getVNInfoBefore(LIS->getMBBEndIdx(PBB));
+ if (!Value)
+ return true;
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(Value->def);
+ if (!DefMI || !isVectorConfigInstr(*DefMI))
return true;
// We found a VSET(I)VLI make sure it matches the output of the
// predecessor block.
- VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, *MRI);
+ VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI, LIS);
if (DefInfo != PBBExit)
return true;
@@ -1379,19 +1414,28 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
if (VLOp.isR...
[truncated]
|
348b9ea
to
d9891e8
Compare
LGTM. I try to enable the post-ra path in BeMg@e457944 base on this patch. It look work very well. Share the entire pass both pre-ra and post-ra. |
d9891e8
to
0631979
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I did a pass over the tests - stopping at test/CodeGen/RISCV/rvv/vaaddu.ll.
I see one case I couldn't explain. None of the others concerned me, but there are a ton of changes.
I think we can and should significantly reduce this test diff. The heuristic I'd suggest would be to have the backwards pass at the end move a vsetvli as far backwards in the block as it can (i.e. moving over don't care instructions) provided it doesn't increase register pressure. (I'd start with a version which didn't care about regpressure at all, and see if that even matters - all of the cases I'm seeing look neutral at worst.)
To be clear, we'd add said heuristic in a separate patch, then rebase this one.
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass | ||
; CHECK-NEXT: RISC-V Insert Write VXRM Pass | ||
; CHECK-NEXT: Init Undef Pass | ||
; CHECK-NEXT: Eliminate PHI nodes for register allocation | ||
; CHECK-NEXT: MachineDominator Tree Construction | ||
; CHECK-NEXT: Slot index numbering |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor, but it looks like maybe one of either two address or fast alloc isn't preserving an analysis that it probably could.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hopefully this is just temporary and will go away once we fully move it to after Fast Register Allocator and before RISC-V Coalesce VSETVLI pass, where it can reuse the live interval analysis there.
@@ -1244,10 +1244,10 @@ define <vscale x 16 x i64> @vp_ctlz_nxv16i64(<vscale x 16 x i64> %va, <vscale x | |||
; CHECK-NEXT: sltu a3, a0, a2 | |||
; CHECK-NEXT: addi a3, a3, -1 | |||
; CHECK-NEXT: and a2, a3, a2 | |||
; CHECK-NEXT: fsrmi a3, 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's an interesting test diff here. Placing the vsetvli inside the frm preserve region (read, and later writeback) causes slightly higher register pressure.
I think this is safely ignorable, but if we cared, we could heuristically move a vsetvli which kills it's register AVL operand backwards over previous instructions which are don't care with respect to VL/VTYPE.
@@ -811,11 +811,11 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { | |||
; CHECK-V-NEXT: addi a0, sp, 16 | |||
; CHECK-V-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload | |||
; CHECK-V-NEXT: vslideup.vi v8, v9, 1 | |||
; CHECK-V-NEXT: vsetivli zero, 4, e64, m2, ta, ma | |||
; CHECK-V-NEXT: csrr a0, vlenb |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This diff doesn't matter, but is another instance where moving the vsetvli maximally backward would remove a test diff.
@@ -1176,9 +1176,9 @@ define void @sink_splat_ashr_scalable(ptr nocapture %a) { | |||
; CHECK-NEXT: andi a3, a1, 1024 | |||
; CHECK-NEXT: xori a1, a3, 1024 | |||
; CHECK-NEXT: slli a4, a4, 1 | |||
; CHECK-NEXT: vsetvli a5, zero, e32, m2, ta, ma | |||
; CHECK-NEXT: mv a5, a0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This one is an interesting example. The destination vreg is presumably dead, and by placing it later in the block we have higher register pressure. My guess is that these moves come from phi elimination? If so, maybe we have a slightly different move backwards to reduce register pressure lurking here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be explicit, I'm fine with this landing without the backwards heuristic as you've removed the diffs by taking them eagerly in another patch, but I would like to see us follow up on this after this series has landed. I think the scheduling to reduce register pressure is interesting here, and if the scheduler doesn't manage that on it's own after the whole patch sequence, we should look into this more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I gave a very quick stab at moving the vsetvli up before the previous GPR def (so the dead def is immediately reused), and we're able to reuse the GPR and remove the spill in this case. lukel97@fe81aa4
But on SPEC 2017 it actually seems to increase the number of spills by 0.2%, both on main and when applied to this PR. So it doesn't look like its actually removing any spills that may have been introduced by this PR.
My guess is that since we have to update LiveIntervals that it's perturbing something in regalloc ever so slightly, or it might actually be moving the vsetvli up into a higher pressure region and backfiring.
This increase in register pressure seems small though (it's limited to just the single instruction slot and it's in the scalar domain, so hopefully less likely to spill than a vector), are we considering it something that needs fixed in this PR?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Woops, your reply came in as I was typing my comment so I didn't see it, I agree we should follow up on this. I did try inducing a spill in a local test case and I was surprised to see that the scheduler doesn't actually move the vsetvli to avoid the spill. And I presume this is not specific to vsetvlis but for any dead def.
This further splits off llvm#91440 to inch RISCVInsertVSETVLI closer to post vector regalloc. As noted in llvm#91440, most of the diffs are from moving vsetvli insertion after the vxrm/csr insertion passes, but these are getting conflated with the changes from moving to LiveIntervals. One idea was that we could try and remove some of these diffs by manually moving back the vsetvlis past the vxrm/csr instructions. But this meant having to touch up the LiveIntervals again which seemed to lead to even more diffs. This instead just moves RISCVInsertVSETVLI after RISCVInsertReadWriteCSR and RISCVInsertWriteVXRM so we can isolate those changes.
This further splits off #91440 to inch RISCVInsertVSETVLI closer to post vector regalloc. As noted in #91440, most of the diffs are from moving vsetvli insertion after the vxrm/csr insertion passes, but these are getting conflated with the changes from moving to LiveIntervals. One idea was that we could try and remove some of these diffs by manually moving back the vsetvlis past the vxrm/csr instructions. But this meant having to touch up the LiveIntervals again which seemed to lead to even more diffs. This instead just moves RISCVInsertVSETVLI after RISCVInsertReadWriteCSR and RISCVInsertWriteVXRM so we can isolate those changes.
Split off from llvm#70549, this patch moves RISCVInsertVSETVLI to after phi elimination where we exit SSA and need to move to LiveVariables. The motivation for splitting this off is to avoid the large scheduling diffs from moving completely to after regalloc, and instead focus on converting the pass to work on LiveIntervals. This limits the test diff to vsetvlis moving between csr and fsr instructions, due to RISCVInsertVSETVLI now taking place after RISCVInsertReadWriteCSR and RISCVInsertWriteVXRM. The two main changes required are updating VSETVLIInfo to store VNInfos instead of MachineInstrs, which allows us to still check for PHI defs in needVSETVLIPHI, and fixing up the live intervals of any AVL operands after inserting new instructions. On O3 the pass is inserted after the register coalescer, otherwise we end up with a bunch of COPYs around eliminated PHIs that trip up needVSETVLIPHI. Note that this manually fixes up the LiveIntervals instead of recomputing them as is currently done in llvm#70549, since it seems to avoid most of the changes in spills and reloads that we were seeing. Specifically LIS->handleMove seemed to change the SlotIndex of instructions which might have affected regalloc. After this patch moving to post regalloc should be a matter of moving the pass in RISCVTargetMachines wihout any further changes to RISCVInsertVSETVLI itself. Co-authored-by: Piyou Chen <[email protected]> Co-authored-by: Luke Lau <[email protected]>
0631979
to
476dd95
Compare
I've separated out the changes around vxrm/csr into #91701 and landed them. This PR is now rebased on top so we should be left with the interesting test diffs now |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
None of the remaining test diffs are concerning, here's a pass of minor comments over the code.
I want to be careful to say that I don't have enough depth on the LiveIntervals/VNInfo stuff to be a knowledgeable reviewer. My comments are mostly surface level.
@@ -426,7 +434,7 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { | |||
/// values of the VL and VTYPE registers after insertion. | |||
class VSETVLIInfo { | |||
struct AVLDef { | |||
const MachineInstr *DefMI; | |||
const VNInfo *AVLVNInfo; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this be null? Add a comment please.
* Add getAVLDefMI helper method, add comment that PHI nodes won't have a def * Assert getVNInfoFromReg is always from a virtual register and never returns nullptr * Comment why we need to extend the AVL live interval Also rename AVLDef.AVLVNInfo -> AVLDef.VNInfo, AVL is already in the name.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This is probably a bit of an edge case (note the declare <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
<vscale x 1 x i8>,
<vscale x 1 x i8>,
<vscale x 1 x i8>,
i64);
define <vscale x 1 x i8> @intrinsic_vadd_vv_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2) nounwind {
entry:
%a = call <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
<vscale x 1 x i8> %0,
<vscale x 1 x i8> %1,
<vscale x 1 x i8> %2,
i64 undef)
ret <vscale x 1 x i8> %a
}
|
Before #91440 a VSETVLIInfo would have had an IMPLICIT_DEF defining instruction, but now we look up a VNInfo which doesn't exist, which triggers an assertion failure. Mark these undef AVLs as AVLIsIgnored.
As noted in llvm#91440 (comment), if the pass pipeline stops early because of -stop-after any allocated passes added with insertPass will not be freed if they haven't already been added. This was showing up as a failure on the address sanitizer buildbots. We can fix it by instead passing the pass ID instead so that allocation is deferred.
FYI, there appears to be another regression here. When merging these changes downstream, I see many instances of:
I see this for both vmv.s.x and vmv.v.x. It looks like maybe we broke the don't care on tail policy somehow? |
I think I see what happened here. InitUndef is replacing undef/noreg/implicit_def operands with well defined sources, and we're no longer considering them to have a undef passthrough. The downstream effect of this is magnified by another pass we have downstream, so not your problem. But I think this does explain some of the diffs I called out in the other review. Can we plausible move InitUndef to just after the new placement here? |
…ter (#92303) As noted in #91440 (comment), if the pass pipeline stops early because of -stop-after any allocated passes added with insertPass will not be freed if they haven't already been added. This was showing up as a failure on the address sanitizer buildbots. We can fix it by instead passing the pass ID instead so that allocation is deferred.
ProcessImplicitDefs should run after InitUndef and add back the undef flag though, otherwise RISCVInsertVSETVLI would never consider any passthrus as undef.
I think so, if some passthrus are slipping past ProcessImplicitDefs and not getting marked as undef, moving it to after PHI elimination might explain some of those diffs. But it's also probably worth mentioning that InitUndef has to run before regalloc by design, which is at odds with moving RISCVInsertVSETVLI after regalloc. So eventually we'll have to move RISCVInsertVSETVLI past InitUndef. |
I confused myself here and forgot about the PseudoRVVInitUndef pseudos. But it turns out we only insert the undef pseudos on non-tied operands, i.e. never the passthru. RISCVInsertVSETVLI conveniently only checks for undef on the passthrus, so we don't need to worry about them. |
Split off from #70549, this patch moves RISCVInsertVSETVLI to after phi elimination where we exit SSA and need to move to LiveVariables.
The motivation for splitting this off is to avoid the large scheduling diffs from moving completely to after regalloc, and instead focus on converting the pass to work on LiveIntervals.
The two main changes required are updating VSETVLIInfo to store VNInfos instead of MachineInstrs, which allows us to still check for PHI defs in needVSETVLIPHI, and fixing up the live intervals of any AVL operands after inserting new instructions.
On O3 the pass is inserted after the register coalescer, otherwise we end up with a bunch of COPYs around eliminated PHIs that trip up needVSETVLIPHI.
Note that this manually fixes up the LiveIntervals instead of recomputing them as is currently done in #70549, since it seems to avoid most of the changes in spills and reloads that we were seeing. Specifically LIS->handleMove seemed to change the SlotIndex of instructions which might have affected regalloc.
After this patch moving to post regalloc should be a matter of moving the pass in RISCVTargetMachine wihout any further changes to RISCVInsertVSETVLI itself.
Co-authored-by: Piyou Chen [email protected]
Co-authored-by: Luke Lau [email protected]