-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Teach RISCVInsertVSETVLI to work without LiveIntervals #94686
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesStacked on #94658. We recently moved RISCVInsertVSETVLI from before vector register allocation to after vector register allocation. When doing so, we added an unconditional dependency on LiveIntervals - even at O0 where LiveIntevals hadn't previously run. As reported in #93587, this was apparently not safe to do. This change makes LiveIntervals optional, and adjusts all the update code to only run wen live intervals is present. The only real tricky part of this change is the abstract state tracking in the dataflow. We need to represent a "register w/unknown definition" state - but only when we don't have LiveIntervals. This adjust the abstract state definition so that the AVLIsReg state can represent either a register + valno, or a register + unknown definition. With LiveIntervals, we have an exact definition for each AVL use. Without LiveIntervals, we treat the definition of a register AVL as being unknown. The key semantic change is that we now have a state in the lattice for which something is known about the AVL value, but for which two identical lattice elements do not necessarily represent the same AVL value at runtime. Previously, the only case which could result in such an unknown AVL was the fully unknown state (where VTYPE is also fully unknown). This requires a small adjustment to hasSameAVL and lattice state equality to draw this important distinction. The net effect of this patch is that we remove the LiveIntervals dependency at O0, and O0 code quality will regress for cases involving register AVL values. This patch is an alternative to #93796 and #94340. It is very directly inspired by review conversation around them, and thus should be considered coauthored by Luke. Patch is 99.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94686.diff 12 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index a96768240a933..6585df384eefd 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -52,6 +52,8 @@ namespace {
static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
const LiveIntervals *LIS) {
assert(Reg.isVirtual());
+ if (!LIS)
+ return nullptr;
auto &LI = LIS->getInterval(Reg);
SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI);
return LI.getVNInfoBefore(SI);
@@ -397,7 +399,9 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
if (RISCVII::hasSEWOp(TSFlags)) {
Res.demandVTYPE();
if (RISCVII::hasVLOp(TSFlags))
- Res.demandVL();
+ if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
+ !VLOp.isReg() || !VLOp.isUndef())
+ Res.demandVL();
// Behavior is independent of mask policy.
if (!RISCVII::usesMaskPolicy(TSFlags))
@@ -503,7 +507,8 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
/// values of the VL and VTYPE registers after insertion.
class VSETVLIInfo {
struct AVLDef {
- // Every AVLDef should have a VNInfo.
+ // Every AVLDef should have a VNInfo, unless we're running without
+ // LiveIntervals in which case this will be nullptr.
const VNInfo *ValNo;
Register DefReg;
};
@@ -517,8 +522,7 @@ class VSETVLIInfo {
AVLIsReg,
AVLIsImm,
AVLIsVLMAX,
- AVLIsIgnored,
- Unknown,
+ Unknown, // AVL and VTYPE are fully unknown
} State = Uninitialized;
// Fields from VTYPE.
@@ -544,7 +548,7 @@ class VSETVLIInfo {
bool isUnknown() const { return State == Unknown; }
void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
- assert(VNInfo && AVLReg.isVirtual());
+ assert(AVLReg.isVirtual());
AVLRegDef.ValNo = VNInfo;
AVLRegDef.DefReg = AVLReg;
State = AVLIsReg;
@@ -557,12 +561,9 @@ class VSETVLIInfo {
void setAVLVLMAX() { State = AVLIsVLMAX; }
- void setAVLIgnored() { State = AVLIsIgnored; }
-
bool hasAVLImm() const { return State == AVLIsImm; }
bool hasAVLReg() const { return State == AVLIsReg; }
bool hasAVLVLMAX() const { return State == AVLIsVLMAX; }
- bool hasAVLIgnored() const { return State == AVLIsIgnored; }
Register getAVLReg() const {
assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual());
return AVLRegDef.DefReg;
@@ -577,9 +578,11 @@ class VSETVLIInfo {
}
// Most AVLIsReg infos will have a single defining MachineInstr, unless it was
// a PHI node. In that case getAVLVNInfo()->def will point to the block
- // boundary slot.
+ // boundary slot. If LiveIntervals isn't available, then nullptr is returned.
const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
assert(hasAVLReg());
+ if (!LIS)
+ return nullptr;
auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
assert(!(getAVLVNInfo()->isPHIDef() && MI));
return MI;
@@ -593,8 +596,6 @@ class VSETVLIInfo {
setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg());
else if (Info.hasAVLVLMAX())
setAVLVLMAX();
- else if (Info.hasAVLIgnored())
- setAVLIgnored();
else {
assert(Info.hasAVLImm());
setAVLImm(Info.getAVLImm());
@@ -615,8 +616,6 @@ class VSETVLIInfo {
}
if (hasAVLVLMAX())
return true;
- if (hasAVLIgnored())
- return false;
return false;
}
@@ -627,10 +626,15 @@ class VSETVLIInfo {
return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
}
- bool hasSameAVL(const VSETVLIInfo &Other) const {
- if (hasAVLReg() && Other.hasAVLReg())
+ bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const {
+ if (hasAVLReg() && Other.hasAVLReg()) {
+ assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
+ "we either have intervals or we don't");
+ if (!getAVLVNInfo())
+ return getAVLReg() == Other.getAVLReg();
return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
getAVLReg() == Other.getAVLReg();
+ }
if (hasAVLImm() && Other.hasAVLImm())
return getAVLImm() == Other.getAVLImm();
@@ -638,12 +642,24 @@ class VSETVLIInfo {
if (hasAVLVLMAX())
return Other.hasAVLVLMAX() && hasSameVLMAX(Other);
- if (hasAVLIgnored())
- return Other.hasAVLIgnored();
-
return false;
}
+ // Return true if the two lattice values are guaranteed to have
+ // the same AVL value at runtime.
+ bool hasSameAVL(const VSETVLIInfo &Other) const {
+ // Without LiveIntervals, we don't know which instruction defines a
+ // register. Since a register may be redefined, this means all AVLIsReg
+ // states must be treated as possibly distinct.
+ if (hasAVLReg() && Other.hasAVLReg()) {
+ assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
+ "we either have intervals or we don't");
+ if (!getAVLVNInfo())
+ return false;
+ }
+ return hasSameAVLLatticeValue(Other);
+ }
+
void setVTYPE(unsigned VType) {
assert(isValid() && !isUnknown() &&
"Can't set VTYPE for uninitialized or unknown");
@@ -745,8 +761,8 @@ class VSETVLIInfo {
if (Other.isUnknown())
return isUnknown();
- if (!hasSameAVL(Other))
- return false;
+ if (!hasSameAVLLatticeValue(Other))
+ return false;
// If the SEWLMULRatioOnly bits are different, then they aren't equal.
if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
@@ -816,8 +832,6 @@ class VSETVLIInfo {
OS << "AVLImm=" << (unsigned)AVLImm;
if (hasAVLVLMAX())
OS << "AVLVLMAX";
- if (hasAVLIgnored())
- OS << "AVLIgnored";
OS << ", "
<< "VLMul=" << (unsigned)VLMul << ", "
<< "SEW=" << (unsigned)SEW << ", "
@@ -855,6 +869,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
const RISCVSubtarget *ST;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
+ // Possibly null!
LiveIntervals *LIS;
std::vector<BlockData> BlockInfo;
@@ -869,9 +884,9 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<LiveIntervals>();
+ AU.addUsedIfAvailable<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
- AU.addRequired<SlotIndexes>();
+ AU.addUsedIfAvailable<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveDebugVariables>();
AU.addPreserved<LiveStacks>();
@@ -932,11 +947,12 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const {
"Can't handle X0, X0 vsetvli yet");
if (AVLReg == RISCV::X0)
NewInfo.setAVLVLMAX();
- else if (VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS))
- NewInfo.setAVLRegDef(VNI, AVLReg);
+ else if (MI.getOperand(1).isUndef())
+ // Otherwise use an AVL of 1 to avoid depending on previous vl.
+ NewInfo.setAVLImm(1);
else {
- assert(MI.getOperand(1).isUndef());
- NewInfo.setAVLIgnored();
+ VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS);
+ NewInfo.setAVLRegDef(VNI, AVLReg);
}
}
NewInfo.setVTYPE(MI.getOperand(2).getImm());
@@ -1008,18 +1024,18 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const {
}
else
InstrInfo.setAVLImm(Imm);
- } else if (VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS)) {
- InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
+ } else if (VLOp.isUndef()) {
+ // Otherwise use an AVL of 1 to avoid depending on previous vl.
+ InstrInfo.setAVLImm(1);
} else {
- assert(VLOp.isUndef());
- InstrInfo.setAVLIgnored();
+ VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS);
+ InstrInfo.setAVLRegDef(VNI, VLOp.getReg());
}
} else {
assert(isScalarExtractInstr(MI));
- // TODO: If we are more clever about x0,x0 insertion then we should be able
- // to deduce that the VL is ignored based off of DemandedFields, and remove
- // the AVLIsIgnored state. Then we can just use an arbitrary immediate AVL.
- InstrInfo.setAVLIgnored();
+ // Pick a random value for state tracking purposes, will be ignored via
+ // the demanded fields mechanism
+ InstrInfo.setAVLImm(1);
}
#ifndef NDEBUG
if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
@@ -1066,7 +1082,8 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
.addReg(RISCV::VL, RegState::Implicit);
- LIS->InsertMachineInstrInMaps(*MI);
+ if (LIS)
+ LIS->InsertMachineInstrInMaps(*MI);
return;
}
@@ -1083,7 +1100,8 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
.addReg(RISCV::VL, RegState::Implicit);
- LIS->InsertMachineInstrInMaps(*MI);
+ if (LIS)
+ LIS->InsertMachineInstrInMaps(*MI);
return;
}
}
@@ -1095,29 +1113,8 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addImm(Info.getAVLImm())
.addImm(Info.encodeVTYPE());
- LIS->InsertMachineInstrInMaps(*MI);
- return;
- }
-
- if (Info.hasAVLIgnored()) {
- // We can only use x0, x0 if there's no chance of the vtype change causing
- // the previous vl to become invalid.
- if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
- Info.hasSameVLMAX(PrevInfo)) {
- auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addReg(RISCV::X0, RegState::Kill)
- .addImm(Info.encodeVTYPE())
- .addReg(RISCV::VL, RegState::Implicit);
+ if (LIS)
LIS->InsertMachineInstrInMaps(*MI);
- return;
- }
- // Otherwise use an AVL of 1 to avoid depending on previous vl.
- auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addImm(1)
- .addImm(Info.encodeVTYPE());
- LIS->InsertMachineInstrInMaps(*MI);
return;
}
@@ -1127,8 +1124,10 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
.addReg(DestReg, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE());
- LIS->InsertMachineInstrInMaps(*MI);
- LIS->createAndComputeVirtRegInterval(DestReg);
+ if (LIS) {
+ LIS->InsertMachineInstrInMaps(*MI);
+ LIS->createAndComputeVirtRegInterval(DestReg);
+ }
return;
}
@@ -1138,12 +1137,14 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(AVLReg)
.addImm(Info.encodeVTYPE());
- LIS->InsertMachineInstrInMaps(*MI);
- // Normally the AVL's live range will already extend past the inserted vsetvli
- // because the pseudos below will already use the AVL. But this isn't always
- // the case, e.g. PseudoVMV_X_S doesn't have an AVL operand.
- LIS->getInterval(AVLReg).extendInBlock(
- LIS->getMBBStartIdx(&MBB), LIS->getInstructionIndex(*MI).getRegSlot());
+ if (LIS) {
+ LIS->InsertMachineInstrInMaps(*MI);
+ // Normally the AVL's live range will already extend past the inserted vsetvli
+ // because the pseudos below will already use the AVL. But this isn't always
+ // the case, e.g. PseudoVMV_X_S doesn't have an AVL operand.
+ LIS->getInterval(AVLReg).extendInBlock(
+ LIS->getMBBStartIdx(&MBB), LIS->getInstructionIndex(*MI).getRegSlot());
+ }
}
/// Return true if a VSETVLI is required to transition from CurInfo to Require
@@ -1257,10 +1258,13 @@ void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
if (RISCV::isFaultFirstLoad(MI)) {
// Update AVL to vl-output of the fault first load.
assert(MI.getOperand(1).getReg().isVirtual());
- auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
- SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
- VNInfo *VNI = LI.getVNInfoAt(SI);
- Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
+ if (LIS) {
+ auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
+ SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
+ VNInfo *VNI = LI.getVNInfoAt(SI);
+ Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
+ } else
+ Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg());
return;
}
@@ -1354,6 +1358,9 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
if (!Require.hasAVLReg())
return true;
+ if (!LIS)
+ return true;
+
// We need the AVL to have been produced by a PHI node in this basic block.
const VNInfo *Valno = Require.getAVLVNInfo();
if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
@@ -1429,27 +1436,29 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
if (VLOp.isReg()) {
Register Reg = VLOp.getReg();
- LiveInterval &LI = LIS->getInterval(Reg);
// Erase the AVL operand from the instruction.
VLOp.setReg(RISCV::NoRegister);
VLOp.setIsKill(false);
- SmallVector<MachineInstr *> DeadMIs;
- LIS->shrinkToUses(&LI, &DeadMIs);
- // We might have separate components that need split due to
- // needVSETVLIPHI causing us to skip inserting a new VL def.
- SmallVector<LiveInterval *> SplitLIs;
- LIS->splitSeparateComponents(LI, SplitLIs);
-
- // If the AVL was an immediate > 31, then it would have been emitted
- // as an ADDI. However, the ADDI might not have been used in the
- // vsetvli, or a vsetvli might not have been emitted, so it may be
- // dead now.
- for (MachineInstr *DeadMI : DeadMIs) {
- if (!TII->isAddImmediate(*DeadMI, Reg))
- continue;
- LIS->RemoveMachineInstrFromMaps(*DeadMI);
- DeadMI->eraseFromParent();
+ if (LIS) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ SmallVector<MachineInstr *> DeadMIs;
+ LIS->shrinkToUses(&LI, &DeadMIs);
+ // We might have separate components that need split due to
+ // needVSETVLIPHI causing us to skip inserting a new VL def.
+ SmallVector<LiveInterval *> SplitLIs;
+ LIS->splitSeparateComponents(LI, SplitLIs);
+
+ // If the AVL was an immediate > 31, then it would have been emitted
+ // as an ADDI. However, the ADDI might not have been used in the
+ // vsetvli, or a vsetvli might not have been emitted, so it may be
+ // dead now.
+ for (MachineInstr *DeadMI : DeadMIs) {
+ if (!TII->isAddImmediate(*DeadMI, Reg))
+ continue;
+ LIS->RemoveMachineInstrFromMaps(*DeadMI);
+ DeadMI->eraseFromParent();
+ }
}
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
@@ -1506,6 +1515,9 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
if (!UnavailablePred || !AvailableInfo.isValid())
return;
+ if (!LIS)
+ return;
+
// If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
// the unavailable pred.
if (AvailableInfo.hasSEWLMULRatioOnly())
@@ -1529,11 +1541,6 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
return;
}
- // If the AVL isn't used in its predecessors then bail, since we have no AVL
- // to insert a vsetvli with.
- if (AvailableInfo.hasAVLIgnored())
- return;
-
// Model the effect of changing the input state of the block MBB to
// AvailableInfo. We're looking for two issues here; one legality,
// one profitability.
@@ -1657,7 +1664,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
// The def of DefReg moved to MI, so extend the LiveInterval up to
// it.
- if (DefReg.isVirtual()) {
+ if (DefReg.isVirtual() && LIS) {
LiveInterval &DefLI = LIS->getInterval(DefReg);
SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
@@ -1686,13 +1693,15 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
if (OldVLReg && OldVLReg.isVirtual()) {
// NextMI no longer uses OldVLReg so shrink its LiveInterval.
- LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
+ if (LIS)
+ LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
MRI->use_nodbg_empty(OldVLReg)) {
VLOpDef->eraseFromParent();
- LIS->removeInterval(OldVLReg);
+ if (LIS)
+ LIS->removeInterval(OldVLReg);
}
}
MI.setDesc(NextMI->getDesc());
@@ -1708,7 +1717,8 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
NumCoalescedVSETVL += ToDelete.size();
for (auto *MI : ToDelete) {
- LIS->RemoveMachineInstrFromMaps(*MI);
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
MI->eraseFromParent();
}
}
@@ -1723,12 +1733,14 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(),
TII->get(RISCV::PseudoReadVL), VLOutput);
// Move the LiveInterval's definition down to PseudoReadVL.
- SlotIndex NewDefSI =
+ if (LIS) {
+ SlotIndex NewDefSI =
LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
- LiveInterval &DefLI = LIS->getInterval(VLOutput);
- VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
- DefLI.removeSegment(DefLI.beginIndex(), NewDefSI);
- DefVNI->def = NewDefSI;
+ LiveInterval &DefLI = LIS->getInterval(VLOutput);
+ VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
+ DefLI.removeSegment(DefLI.beginIndex(), NewDefSI);
+ DefVNI->def = NewDefSI;
+ }
}
// We don't use the vl output of the VLEFF/VLSEGFF anymore.
MI.getOperand(1).setReg(RISCV::X0);
@@ -1746,7 +1758,7 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
TII = ST->getInstrInfo();
MRI = &MF.getRegInfo();
- LIS = &getAnalysis<LiveIntervals>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
assert(BlockInfo.empty() && "Expect empty block infos");
BlockInfo.resize(MF.getNumBlockIDs());
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
index ec49ed302d49d..953eb873b660b 100644
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -47,9 +47,6 @@
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
-; CHECK-NEXT: MachineDominator Tree Construction
-; CHECK-NEXT: Slot index numbering
-; CHECK-NEXT: Live Interval Analysis
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: Fast Register Allocator
; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll
index 48e820243c957..8b31166e313de 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll
@@ -44,15 +44,16 @@ define <2 x half> @expandload_v2f16(ptr %base, <2 x...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, the LIS changes are pretty much what I had locally. Is #94658 strictly required for this though? I would have thought that state would have actually been OK without LiveIntervals
In review around #94686, we had a discussion about a possible O0 specific miscompile case without test coverage. The particular case turned out not be possible to exercise in practice, but improving our test coverage remains a good idea if we're going to have differences in the dataflow with and without live intervals.
We recently moved RISCVInsertVSETVLI from before vector register allocation to after vector register allocation. When doing so, we added an unconditional dependency on LiveIntervals - even at O0 where LiveIntevals hadn't previously run. As reported in llvm#93587, this was apparently not safe to do. This change makes LiveIntervals optional, and adjusts all the update code to only run wen live intervals is present. The only real tricky part of this change is the abstract state tracking in the dataflow. We need to represent a "register w/unknown definition" state - but only when we don't have LiveIntervals. This adjust the abstract state definition so that the AVLIsReg state can represent either a register + valno, or a register + unknown definition. With LiveIntervals, wehave an exact definition for each AVL use. Without LiveIntervals, we treat the definition of a register AVL as being unknown. The key semantic change is that we now have a state in the lattice for which something is known about the AVL value, but for which two identical lattice elements do *not* neccessarily represent the same AVL value at runtime. Previously, the only case which could result in such an unknown AVL was the fully unknown state (where VTYPE is also fully unknown). This requires a small adjustment to hasSameAVL and lattice state equality to draw this important distinction. The net effect of this patch is that we remove the LiveIntervals dependency at O0, and O0 code quality will regress for cases involving register AVL values. In practice, this means we pessimize code written with intrinsics at O0. This patch is an alternative to llvm#93796 and llvm#94340. It is very directly inspired by review conversation around them, and thus should be considered coauthored by Luke.
1ab0542
to
42d2c13
Compare
You can test this locally with the following command:git-clang-format --diff 964c92d04f039a205327cb47c75919096f9fca94 42d2c133716ec51f451b8bf056b6bf341d0d5242 -- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 0537d3e324..f5ed2e0755 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -529,7 +529,7 @@ class VSETVLIInfo {
AVLIsReg,
AVLIsImm,
AVLIsVLMAX,
- Unknown, // AVL and VTYPE are fully unknown
+ Unknown, // AVL and VTYPE are fully unknown
} State = Uninitialized;
// Fields from VTYPE.
@@ -767,7 +767,7 @@ public:
return isUnknown();
if (!hasSameAVLLatticeValue(Other))
- return false;
+ return false;
// If the SEWLMULRatioOnly bits are different, then they aren't equal.
if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
@@ -1144,9 +1144,9 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
.addImm(Info.encodeVTYPE());
if (LIS) {
LIS->InsertMachineInstrInMaps(*MI);
- // Normally the AVL's live range will already extend past the inserted vsetvli
- // because the pseudos below will already use the AVL. But this isn't always
- // the case, e.g. PseudoVMV_X_S doesn't have an AVL operand.
+ // Normally the AVL's live range will already extend past the inserted
+ // vsetvli because the pseudos below will already use the AVL. But this
+ // isn't always the case, e.g. PseudoVMV_X_S doesn't have an AVL operand.
LIS->getInterval(AVLReg).extendInBlock(
LIS->getMBBStartIdx(&MBB), LIS->getInstructionIndex(*MI).getRegSlot());
}
@@ -1265,7 +1265,8 @@ void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
assert(MI.getOperand(1).getReg().isVirtual());
if (LIS) {
auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
- SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
+ SlotIndex SI =
+ LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
VNInfo *VNI = LI.getVNInfoAt(SI);
Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
} else
@@ -1740,7 +1741,7 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
// Move the LiveInterval's definition down to PseudoReadVL.
if (LIS) {
SlotIndex NewDefSI =
- LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
+ LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
LiveInterval &DefLI = LIS->getInterval(VLOutput);
VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
DefLI.removeSegment(DefLI.beginIndex(), NewDefSI);
|
Rebased. There's been some offline discussion here, so let me summarize. Luke identified a potential miscompile in this patch. Specifically, the pass 3 emitVSETVLI codepath was previously assuming that if a state was equal in the data flow that the AVL register must hold the same value at runtime. This was an entirely reasonable assumption with the old data flow (and still is at O3), but becomes unsound after this change. After offline discussion, a cleanup change was landed as #94340 (which was cut down significantly from prior attempts at this cleanup - the other cases were unreachable code). This was NFC for the current data flow, but fixes the potential miscompile in this one. However, after trying to write test cases to illustrate this change, I think the miscompile above is purely conceptual. I could not find a way in practice to get the backend to reuse the same register for two different AVLs which is what would be required to hit this. Worth noting is that fixing this is a severe regression in code quality for the AVLReg case. This case isn't use for either fixed or scalable vectors. (Well, fixed could see if for 32+ elements..) However, intrinsics usage will be hit pretty hard (at O0) for code which uses the vsetvli intrinsic pattern. I don't think this matters, just noting it for the record. |
@lukel97 I just had a thought. We still have MRI after vector regalloc, and while we can't be guaranteed to find a unique definition, MRI does have means for cheaply getting a unique definition if one exists. You see any reason why we can't opportunistically use that in hasNonZeroAVL, and hasSameAVL? We couldn't easily use it during state merging, but off the top of my head I'm not seeing any reason that compatibility style reasoning can't? We don't necessarily need to add that in this patch - mostly thinking about mitigations for perf loss if anyone does care about O0 intrinsic performance. |
If by opportunistically you mean use MachineRegisterInfo::getUniqueVRegDef and then fall back to VNInfo if it's null, I think that could work. We only really need VNInfo for the cases where the AVL came from an eliminated phi. Agreed, sounds like a reasonable route if we want to recover those O0 regressions. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Just a heads up looks like clang-format is complaining, it might need a quick run
@@ -52,6 +52,8 @@ namespace { | |||
static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit, should probably update the comment to mention it will return nullptr if LIS is null too
Stacked on #94658.
We recently moved RISCVInsertVSETVLI from before vector register allocation to after vector register allocation. When doing so, we added an unconditional dependency on LiveIntervals - even at O0 where LiveIntevals hadn't previously run. As reported in #93587, this was apparently not safe to do.
This change makes LiveIntervals optional, and adjusts all the update code to only run wen live intervals is present. The only real tricky part of this change is the abstract state tracking in the dataflow. We need to represent a "register w/unknown definition" state - but only when we don't have LiveIntervals.
This adjust the abstract state definition so that the AVLIsReg state can represent either a register + valno, or a register + unknown definition. With LiveIntervals, we have an exact definition for each AVL use. Without LiveIntervals, we treat the definition of a register AVL as being unknown.
The key semantic change is that we now have a state in the lattice for which something is known about the AVL value, but for which two identical lattice elements do not necessarily represent the same AVL value at runtime. Previously, the only case which could result in such an unknown AVL was the fully unknown state (where VTYPE is also fully unknown). This requires a small adjustment to hasSameAVL and lattice state equality to draw this important distinction.
The net effect of this patch is that we remove the LiveIntervals dependency at O0, and O0 code quality will regress for cases involving register AVL values.
This patch is an alternative to #93796 and #94340. It is very directly inspired by review conversation around them, and thus should be considered coauthored by Luke.