Skip to content

Commit 42d2c13

Browse files
committed
[RISCV] Teach RISCVInsertVSETVLI to work without LiveIntervals
We recently moved RISCVInsertVSETVLI from before vector register allocation to after vector register allocation. When doing so, we added an unconditional dependency on LiveIntervals - even at O0 where LiveIntevals hadn't previously run. As reported in #93587, this was apparently not safe to do. This change makes LiveIntervals optional, and adjusts all the update code to only run wen live intervals is present. The only real tricky part of this change is the abstract state tracking in the dataflow. We need to represent a "register w/unknown definition" state - but only when we don't have LiveIntervals. This adjust the abstract state definition so that the AVLIsReg state can represent either a register + valno, or a register + unknown definition. With LiveIntervals, wehave an exact definition for each AVL use. Without LiveIntervals, we treat the definition of a register AVL as being unknown. The key semantic change is that we now have a state in the lattice for which something is known about the AVL value, but for which two identical lattice elements do *not* neccessarily represent the same AVL value at runtime. Previously, the only case which could result in such an unknown AVL was the fully unknown state (where VTYPE is also fully unknown). This requires a small adjustment to hasSameAVL and lattice state equality to draw this important distinction. The net effect of this patch is that we remove the LiveIntervals dependency at O0, and O0 code quality will regress for cases involving register AVL values. In practice, this means we pessimize code written with intrinsics at O0. This patch is an alternative to #93796 and #94340. It is very directly inspired by review conversation around them, and thus should be considered coauthored by Luke.
1 parent 964c92d commit 42d2c13

File tree

5 files changed

+152
-61
lines changed

5 files changed

+152
-61
lines changed

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Lines changed: 101 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ namespace {
5252
static VNInfo *getVNInfoFromReg(Register Reg, const MachineInstr &MI,
5353
const LiveIntervals *LIS) {
5454
assert(Reg.isVirtual());
55+
if (!LIS)
56+
return nullptr;
5557
auto &LI = LIS->getInterval(Reg);
5658
SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI);
5759
return LI.getVNInfoBefore(SI);
@@ -512,7 +514,8 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) {
512514
/// values of the VL and VTYPE registers after insertion.
513515
class VSETVLIInfo {
514516
struct AVLDef {
515-
// Every AVLDef should have a VNInfo.
517+
// Every AVLDef should have a VNInfo, unless we're running without
518+
// LiveIntervals in which case this will be nullptr.
516519
const VNInfo *ValNo;
517520
Register DefReg;
518521
};
@@ -526,7 +529,7 @@ class VSETVLIInfo {
526529
AVLIsReg,
527530
AVLIsImm,
528531
AVLIsVLMAX,
529-
Unknown,
532+
Unknown, // AVL and VTYPE are fully unknown
530533
} State = Uninitialized;
531534

532535
// Fields from VTYPE.
@@ -552,7 +555,7 @@ class VSETVLIInfo {
552555
bool isUnknown() const { return State == Unknown; }
553556

554557
void setAVLRegDef(const VNInfo *VNInfo, Register AVLReg) {
555-
assert(VNInfo && AVLReg.isVirtual());
558+
assert(AVLReg.isVirtual());
556559
AVLRegDef.ValNo = VNInfo;
557560
AVLRegDef.DefReg = AVLReg;
558561
State = AVLIsReg;
@@ -582,9 +585,11 @@ class VSETVLIInfo {
582585
}
583586
// Most AVLIsReg infos will have a single defining MachineInstr, unless it was
584587
// a PHI node. In that case getAVLVNInfo()->def will point to the block
585-
// boundary slot.
588+
// boundary slot. If LiveIntervals isn't available, then nullptr is returned.
586589
const MachineInstr *getAVLDefMI(const LiveIntervals *LIS) const {
587590
assert(hasAVLReg());
591+
if (!LIS)
592+
return nullptr;
588593
auto *MI = LIS->getInstructionFromIndex(getAVLVNInfo()->def);
589594
assert(!(getAVLVNInfo()->isPHIDef() && MI));
590595
return MI;
@@ -628,10 +633,15 @@ class VSETVLIInfo {
628633
return (hasNonZeroAVL(LIS) && Other.hasNonZeroAVL(LIS));
629634
}
630635

631-
bool hasSameAVL(const VSETVLIInfo &Other) const {
632-
if (hasAVLReg() && Other.hasAVLReg())
636+
bool hasSameAVLLatticeValue(const VSETVLIInfo &Other) const {
637+
if (hasAVLReg() && Other.hasAVLReg()) {
638+
assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
639+
"we either have intervals or we don't");
640+
if (!getAVLVNInfo())
641+
return getAVLReg() == Other.getAVLReg();
633642
return getAVLVNInfo()->id == Other.getAVLVNInfo()->id &&
634643
getAVLReg() == Other.getAVLReg();
644+
}
635645

636646
if (hasAVLImm() && Other.hasAVLImm())
637647
return getAVLImm() == Other.getAVLImm();
@@ -642,6 +652,21 @@ class VSETVLIInfo {
642652
return false;
643653
}
644654

655+
// Return true if the two lattice values are guaranteed to have
656+
// the same AVL value at runtime.
657+
bool hasSameAVL(const VSETVLIInfo &Other) const {
658+
// Without LiveIntervals, we don't know which instruction defines a
659+
// register. Since a register may be redefined, this means all AVLIsReg
660+
// states must be treated as possibly distinct.
661+
if (hasAVLReg() && Other.hasAVLReg()) {
662+
assert(!getAVLVNInfo() == !Other.getAVLVNInfo() &&
663+
"we either have intervals or we don't");
664+
if (!getAVLVNInfo())
665+
return false;
666+
}
667+
return hasSameAVLLatticeValue(Other);
668+
}
669+
645670
void setVTYPE(unsigned VType) {
646671
assert(isValid() && !isUnknown() &&
647672
"Can't set VTYPE for uninitialized or unknown");
@@ -741,8 +766,8 @@ class VSETVLIInfo {
741766
if (Other.isUnknown())
742767
return isUnknown();
743768

744-
if (!hasSameAVL(Other))
745-
return false;
769+
if (!hasSameAVLLatticeValue(Other))
770+
return false;
746771

747772
// If the SEWLMULRatioOnly bits are different, then they aren't equal.
748773
if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
@@ -849,6 +874,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
849874
const RISCVSubtarget *ST;
850875
const TargetInstrInfo *TII;
851876
MachineRegisterInfo *MRI;
877+
// Possibly null!
852878
LiveIntervals *LIS;
853879

854880
std::vector<BlockData> BlockInfo;
@@ -863,9 +889,9 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
863889
void getAnalysisUsage(AnalysisUsage &AU) const override {
864890
AU.setPreservesCFG();
865891

866-
AU.addRequired<LiveIntervals>();
892+
AU.addUsedIfAvailable<LiveIntervals>();
867893
AU.addPreserved<LiveIntervals>();
868-
AU.addRequired<SlotIndexes>();
894+
AU.addUsedIfAvailable<SlotIndexes>();
869895
AU.addPreserved<SlotIndexes>();
870896
AU.addPreserved<LiveDebugVariables>();
871897
AU.addPreserved<LiveStacks>();
@@ -1061,7 +1087,8 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
10611087
.addReg(RISCV::X0, RegState::Kill)
10621088
.addImm(Info.encodeVTYPE())
10631089
.addReg(RISCV::VL, RegState::Implicit);
1064-
LIS->InsertMachineInstrInMaps(*MI);
1090+
if (LIS)
1091+
LIS->InsertMachineInstrInMaps(*MI);
10651092
return;
10661093
}
10671094

@@ -1078,7 +1105,8 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
10781105
.addReg(RISCV::X0, RegState::Kill)
10791106
.addImm(Info.encodeVTYPE())
10801107
.addReg(RISCV::VL, RegState::Implicit);
1081-
LIS->InsertMachineInstrInMaps(*MI);
1108+
if (LIS)
1109+
LIS->InsertMachineInstrInMaps(*MI);
10821110
return;
10831111
}
10841112
}
@@ -1090,7 +1118,8 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
10901118
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
10911119
.addImm(Info.getAVLImm())
10921120
.addImm(Info.encodeVTYPE());
1093-
LIS->InsertMachineInstrInMaps(*MI);
1121+
if (LIS)
1122+
LIS->InsertMachineInstrInMaps(*MI);
10941123
return;
10951124
}
10961125

@@ -1100,8 +1129,10 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
11001129
.addReg(DestReg, RegState::Define | RegState::Dead)
11011130
.addReg(RISCV::X0, RegState::Kill)
11021131
.addImm(Info.encodeVTYPE());
1103-
LIS->InsertMachineInstrInMaps(*MI);
1104-
LIS->createAndComputeVirtRegInterval(DestReg);
1132+
if (LIS) {
1133+
LIS->InsertMachineInstrInMaps(*MI);
1134+
LIS->createAndComputeVirtRegInterval(DestReg);
1135+
}
11051136
return;
11061137
}
11071138

@@ -1111,12 +1142,14 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
11111142
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
11121143
.addReg(AVLReg)
11131144
.addImm(Info.encodeVTYPE());
1114-
LIS->InsertMachineInstrInMaps(*MI);
1115-
// Normally the AVL's live range will already extend past the inserted vsetvli
1116-
// because the pseudos below will already use the AVL. But this isn't always
1117-
// the case, e.g. PseudoVMV_X_S doesn't have an AVL operand.
1118-
LIS->getInterval(AVLReg).extendInBlock(
1119-
LIS->getMBBStartIdx(&MBB), LIS->getInstructionIndex(*MI).getRegSlot());
1145+
if (LIS) {
1146+
LIS->InsertMachineInstrInMaps(*MI);
1147+
// Normally the AVL's live range will already extend past the inserted vsetvli
1148+
// because the pseudos below will already use the AVL. But this isn't always
1149+
// the case, e.g. PseudoVMV_X_S doesn't have an AVL operand.
1150+
LIS->getInterval(AVLReg).extendInBlock(
1151+
LIS->getMBBStartIdx(&MBB), LIS->getInstructionIndex(*MI).getRegSlot());
1152+
}
11201153
}
11211154

11221155
/// Return true if a VSETVLI is required to transition from CurInfo to Require
@@ -1230,10 +1263,13 @@ void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
12301263
if (RISCV::isFaultFirstLoad(MI)) {
12311264
// Update AVL to vl-output of the fault first load.
12321265
assert(MI.getOperand(1).getReg().isVirtual());
1233-
auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
1234-
SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
1235-
VNInfo *VNI = LI.getVNInfoAt(SI);
1236-
Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
1266+
if (LIS) {
1267+
auto &LI = LIS->getInterval(MI.getOperand(1).getReg());
1268+
SlotIndex SI = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
1269+
VNInfo *VNI = LI.getVNInfoAt(SI);
1270+
Info.setAVLRegDef(VNI, MI.getOperand(1).getReg());
1271+
} else
1272+
Info.setAVLRegDef(nullptr, MI.getOperand(1).getReg());
12371273
return;
12381274
}
12391275

@@ -1327,6 +1363,9 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
13271363
if (!Require.hasAVLReg())
13281364
return true;
13291365

1366+
if (!LIS)
1367+
return true;
1368+
13301369
// We need the AVL to have been produced by a PHI node in this basic block.
13311370
const VNInfo *Valno = Require.getAVLVNInfo();
13321371
if (!Valno->isPHIDef() || LIS->getMBBFromIndex(Valno->def) != &MBB)
@@ -1402,27 +1441,29 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
14021441
MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
14031442
if (VLOp.isReg()) {
14041443
Register Reg = VLOp.getReg();
1405-
LiveInterval &LI = LIS->getInterval(Reg);
14061444

14071445
// Erase the AVL operand from the instruction.
14081446
VLOp.setReg(RISCV::NoRegister);
14091447
VLOp.setIsKill(false);
1410-
SmallVector<MachineInstr *> DeadMIs;
1411-
LIS->shrinkToUses(&LI, &DeadMIs);
1412-
// We might have separate components that need split due to
1413-
// needVSETVLIPHI causing us to skip inserting a new VL def.
1414-
SmallVector<LiveInterval *> SplitLIs;
1415-
LIS->splitSeparateComponents(LI, SplitLIs);
1416-
1417-
// If the AVL was an immediate > 31, then it would have been emitted
1418-
// as an ADDI. However, the ADDI might not have been used in the
1419-
// vsetvli, or a vsetvli might not have been emitted, so it may be
1420-
// dead now.
1421-
for (MachineInstr *DeadMI : DeadMIs) {
1422-
if (!TII->isAddImmediate(*DeadMI, Reg))
1423-
continue;
1424-
LIS->RemoveMachineInstrFromMaps(*DeadMI);
1425-
DeadMI->eraseFromParent();
1448+
if (LIS) {
1449+
LiveInterval &LI = LIS->getInterval(Reg);
1450+
SmallVector<MachineInstr *> DeadMIs;
1451+
LIS->shrinkToUses(&LI, &DeadMIs);
1452+
// We might have separate components that need split due to
1453+
// needVSETVLIPHI causing us to skip inserting a new VL def.
1454+
SmallVector<LiveInterval *> SplitLIs;
1455+
LIS->splitSeparateComponents(LI, SplitLIs);
1456+
1457+
// If the AVL was an immediate > 31, then it would have been emitted
1458+
// as an ADDI. However, the ADDI might not have been used in the
1459+
// vsetvli, or a vsetvli might not have been emitted, so it may be
1460+
// dead now.
1461+
for (MachineInstr *DeadMI : DeadMIs) {
1462+
if (!TII->isAddImmediate(*DeadMI, Reg))
1463+
continue;
1464+
LIS->RemoveMachineInstrFromMaps(*DeadMI);
1465+
DeadMI->eraseFromParent();
1466+
}
14261467
}
14271468
}
14281469
MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
@@ -1479,6 +1520,9 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
14791520
if (!UnavailablePred || !AvailableInfo.isValid())
14801521
return;
14811522

1523+
if (!LIS)
1524+
return;
1525+
14821526
// If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
14831527
// the unavailable pred.
14841528
if (AvailableInfo.hasSEWLMULRatioOnly())
@@ -1625,7 +1669,7 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
16251669

16261670
// The def of DefReg moved to MI, so extend the LiveInterval up to
16271671
// it.
1628-
if (DefReg.isVirtual()) {
1672+
if (DefReg.isVirtual() && LIS) {
16291673
LiveInterval &DefLI = LIS->getInterval(DefReg);
16301674
SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
16311675
VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
@@ -1654,13 +1698,15 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
16541698

16551699
if (OldVLReg && OldVLReg.isVirtual()) {
16561700
// NextMI no longer uses OldVLReg so shrink its LiveInterval.
1657-
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
1701+
if (LIS)
1702+
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
16581703

16591704
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
16601705
if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
16611706
MRI->use_nodbg_empty(OldVLReg)) {
16621707
VLOpDef->eraseFromParent();
1663-
LIS->removeInterval(OldVLReg);
1708+
if (LIS)
1709+
LIS->removeInterval(OldVLReg);
16641710
}
16651711
}
16661712
MI.setDesc(NextMI->getDesc());
@@ -1676,7 +1722,8 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
16761722

16771723
NumCoalescedVSETVL += ToDelete.size();
16781724
for (auto *MI : ToDelete) {
1679-
LIS->RemoveMachineInstrFromMaps(*MI);
1725+
if (LIS)
1726+
LIS->RemoveMachineInstrFromMaps(*MI);
16801727
MI->eraseFromParent();
16811728
}
16821729
}
@@ -1691,12 +1738,14 @@ void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
16911738
auto ReadVLMI = BuildMI(MBB, I, MI.getDebugLoc(),
16921739
TII->get(RISCV::PseudoReadVL), VLOutput);
16931740
// Move the LiveInterval's definition down to PseudoReadVL.
1694-
SlotIndex NewDefSI =
1741+
if (LIS) {
1742+
SlotIndex NewDefSI =
16951743
LIS->InsertMachineInstrInMaps(*ReadVLMI).getRegSlot();
1696-
LiveInterval &DefLI = LIS->getInterval(VLOutput);
1697-
VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1698-
DefLI.removeSegment(DefLI.beginIndex(), NewDefSI);
1699-
DefVNI->def = NewDefSI;
1744+
LiveInterval &DefLI = LIS->getInterval(VLOutput);
1745+
VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
1746+
DefLI.removeSegment(DefLI.beginIndex(), NewDefSI);
1747+
DefVNI->def = NewDefSI;
1748+
}
17001749
}
17011750
// We don't use the vl output of the VLEFF/VLSEGFF anymore.
17021751
MI.getOperand(1).setReg(RISCV::X0);
@@ -1714,7 +1763,7 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
17141763

17151764
TII = ST->getInstrInfo();
17161765
MRI = &MF.getRegInfo();
1717-
LIS = &getAnalysis<LiveIntervals>();
1766+
LIS = getAnalysisIfAvailable<LiveIntervals>();
17181767

17191768
assert(BlockInfo.empty() && "Expect empty block infos");
17201769
BlockInfo.resize(MF.getNumBlockIDs());

llvm/test/CodeGen/RISCV/O0-pipeline.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,6 @@
4747
; CHECK-NEXT: Eliminate PHI nodes for register allocation
4848
; CHECK-NEXT: Two-Address instruction pass
4949
; CHECK-NEXT: Fast Register Allocator
50-
; CHECK-NEXT: MachineDominator Tree Construction
51-
; CHECK-NEXT: Slot index numbering
52-
; CHECK-NEXT: Live Interval Analysis
5350
; CHECK-NEXT: RISC-V Insert VSETVLI pass
5451
; CHECK-NEXT: Fast Register Allocator
5552
; CHECK-NEXT: Remove Redundant DEBUG_VALUE analysis
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv64 -O0 < %s | FileCheck %s
3+
4+
; Make sure we don't run LiveIntervals at O0, otherwise it will crash when
5+
; running on this unreachable block.
6+
7+
define i16 @f() {
8+
; CHECK-LABEL: f:
9+
; CHECK: # %bb.0: # %BB
10+
; CHECK-NEXT: addi sp, sp, -16
11+
; CHECK-NEXT: .cfi_def_cfa_offset 16
12+
; CHECK-NEXT: j .LBB0_1
13+
; CHECK-NEXT: .LBB0_1: # %BB1
14+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
15+
; CHECK-NEXT: li a0, 0
16+
; CHECK-NEXT: sd a0, 8(sp) # 8-byte Folded Spill
17+
; CHECK-NEXT: j .LBB0_1
18+
; CHECK-NEXT: # %bb.2: # %BB1
19+
; CHECK-NEXT: li a0, 0
20+
; CHECK-NEXT: bnez a0, .LBB0_1
21+
; CHECK-NEXT: j .LBB0_3
22+
; CHECK-NEXT: .LBB0_3: # %BB2
23+
; CHECK-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
24+
; CHECK-NEXT: addi sp, sp, 16
25+
; CHECK-NEXT: ret
26+
BB:
27+
br label %BB1
28+
29+
BB1:
30+
%A = or i16 0, 0
31+
%B = fcmp true float 0.000000e+00, 0.000000e+00
32+
%C = or i1 %B, false
33+
br i1 %C, label %BB1, label %BB2
34+
35+
BB2:
36+
ret i16 %A
37+
}

0 commit comments

Comments
 (0)