Skip to content

Commit acb7859

Browse files
authored
[MachineSink] Extend loop sinking capability (#117247)
The current MIR cycle sinking capabilities are rather limited. It only support sinking copies into a single successor block while obeying limits. This opt-in feature adds a more aggressive option, that is not limited to the above concerns. The feature will try to "sink" by duplicating any top-level preheader instruction (that we are sure is safe to sink) into any user block, then does some dead code cleanup. In particular, this is useful for high RP situations when loop bodies have control flow.
1 parent 24f177d commit acb7859

File tree

5 files changed

+1530
-168
lines changed

5 files changed

+1530
-168
lines changed

llvm/lib/CodeGen/MachineSink.cpp

Lines changed: 176 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "llvm/CodeGen/TargetInstrInfo.h"
4646
#include "llvm/CodeGen/TargetPassConfig.h"
4747
#include "llvm/CodeGen/TargetRegisterInfo.h"
48+
#include "llvm/CodeGen/TargetSchedule.h"
4849
#include "llvm/CodeGen/TargetSubtargetInfo.h"
4950
#include "llvm/IR/BasicBlock.h"
5051
#include "llvm/IR/DebugInfoMetadata.h"
@@ -113,6 +114,8 @@ STATISTIC(NumSplit, "Number of critical edges split");
113114
STATISTIC(NumCoalesces, "Number of copies coalesced");
114115
STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
115116

117+
using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
118+
116119
namespace {
117120

118121
class MachineSinking : public MachineFunctionPass {
@@ -128,6 +131,7 @@ class MachineSinking : public MachineFunctionPass {
128131
const MachineBranchProbabilityInfo *MBPI = nullptr;
129132
AliasAnalysis *AA = nullptr;
130133
RegisterClassInfo RegClassInfo;
134+
TargetSchedModel SchedModel;
131135

132136
// Remember which edges have been considered for breaking.
133137
SmallSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 8>
@@ -161,6 +165,8 @@ class MachineSinking : public MachineFunctionPass {
161165
/// would re-order assignments.
162166
using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
163167

168+
using SinkItem = std::pair<MachineInstr *, MachineBasicBlock *>;
169+
164170
/// Record of DBG_VALUE uses of vregs in a block, so that we can identify
165171
/// debug instructions to sink.
166172
SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
@@ -255,7 +261,10 @@ class MachineSinking : public MachineFunctionPass {
255261

256262
void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
257263
SmallVectorImpl<MachineInstr *> &Candidates);
258-
bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
264+
265+
bool
266+
aggressivelySinkIntoCycle(MachineCycle *Cycle, MachineInstr &I,
267+
DenseMap<SinkItem, MachineInstr *> &SunkInstrs);
259268

260269
bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
261270
MachineBasicBlock *MBB,
@@ -271,11 +280,14 @@ class MachineSinking : public MachineFunctionPass {
271280
GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
272281
AllSuccsCache &AllSuccessors) const;
273282

274-
std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB);
283+
std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB,
284+
bool UseCache = true);
275285

276286
bool registerPressureSetExceedsLimit(unsigned NRegs,
277287
const TargetRegisterClass *RC,
278288
const MachineBasicBlock &MBB);
289+
290+
bool registerPressureExceedsLimit(const MachineBasicBlock &MBB);
279291
};
280292

281293
} // end anonymous namespace
@@ -680,6 +692,10 @@ void MachineSinking::FindCycleSinkCandidates(
680692
SmallVectorImpl<MachineInstr *> &Candidates) {
681693
for (auto &MI : *BB) {
682694
LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
695+
if (MI.isMetaInstruction()) {
696+
LLVM_DEBUG(dbgs() << "CycleSink: not sinking meta instruction\n");
697+
continue;
698+
}
683699
if (!TII->shouldSink(MI)) {
684700
LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
685701
"target\n");
@@ -775,31 +791,62 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
775791

776792
if (SinkInstsIntoCycle) {
777793
SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles());
778-
for (auto *Cycle : Cycles) {
779-
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
780-
if (!Preheader) {
781-
LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
782-
continue;
783-
}
784-
SmallVector<MachineInstr *, 8> Candidates;
785-
FindCycleSinkCandidates(Cycle, Preheader, Candidates);
786-
787-
// Walk the candidates in reverse order so that we start with the use
788-
// of a def-use chain, if there is any.
789-
// TODO: Sort the candidates using a cost-model.
790-
unsigned i = 0;
791-
for (MachineInstr *I : llvm::reverse(Candidates)) {
792-
if (i++ == SinkIntoCycleLimit) {
793-
LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to "
794-
"be analysed.");
795-
break;
794+
SchedModel.init(STI);
795+
bool HasHighPressure;
796+
797+
DenseMap<SinkItem, MachineInstr *> SunkInstrs;
798+
799+
enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END };
800+
for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END;
801+
++Stage, SunkInstrs.clear()) {
802+
HasHighPressure = false;
803+
804+
for (auto *Cycle : Cycles) {
805+
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
806+
if (!Preheader) {
807+
LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
808+
continue;
796809
}
810+
SmallVector<MachineInstr *, 8> Candidates;
811+
FindCycleSinkCandidates(Cycle, Preheader, Candidates);
812+
813+
unsigned i = 0;
814+
815+
// Walk the candidates in reverse order so that we start with the use
816+
// of a def-use chain, if there is any.
817+
// TODO: Sort the candidates using a cost-model.
818+
for (MachineInstr *I : llvm::reverse(Candidates)) {
819+
// CycleSinkStage::COPY: Sink a limited number of copies
820+
if (Stage == CycleSinkStage::COPY) {
821+
if (i++ == SinkIntoCycleLimit) {
822+
LLVM_DEBUG(dbgs()
823+
<< "CycleSink: Limit reached of instructions to "
824+
"be analyzed.");
825+
break;
826+
}
827+
828+
if (!I->isCopy())
829+
continue;
830+
}
797831

798-
if (!SinkIntoCycle(Cycle, *I))
799-
break;
800-
EverMadeChange = true;
801-
++NumCycleSunk;
832+
// CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions
833+
// which the target specifies as low-latency
834+
if (Stage == CycleSinkStage::LOW_LATENCY &&
835+
!TII->hasLowDefLatency(SchedModel, *I, 0))
836+
continue;
837+
838+
if (!aggressivelySinkIntoCycle(Cycle, *I, SunkInstrs))
839+
break;
840+
EverMadeChange = true;
841+
++NumCycleSunk;
842+
}
843+
844+
// Recalculate the pressure after sinking
845+
if (!HasHighPressure)
846+
HasHighPressure = registerPressureExceedsLimit(*Preheader);
802847
}
848+
if (!HasHighPressure)
849+
break;
803850
}
804851
}
805852

@@ -1055,13 +1102,15 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
10551102
}
10561103

10571104
std::vector<unsigned> &
1058-
MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
1105+
MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB,
1106+
bool UseCache) {
10591107
// Currently to save compiling time, MBB's register pressure will not change
10601108
// in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
10611109
// register pressure is changed after sinking any instructions into it.
10621110
// FIXME: need a accurate and cheap register pressure estiminate model here.
1111+
10631112
auto RP = CachedRegisterPressure.find(&MBB);
1064-
if (RP != CachedRegisterPressure.end())
1113+
if (UseCache && RP != CachedRegisterPressure.end())
10651114
return RP->second;
10661115

10671116
RegionPressure Pressure;
@@ -1085,6 +1134,12 @@ MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
10851134
}
10861135

10871136
RPTracker.closeRegion();
1137+
1138+
if (RP != CachedRegisterPressure.end()) {
1139+
CachedRegisterPressure[&MBB] = RPTracker.getPressure().MaxSetPressure;
1140+
return CachedRegisterPressure[&MBB];
1141+
}
1142+
10881143
auto It = CachedRegisterPressure.insert(
10891144
std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure));
10901145
return It.first->second;
@@ -1103,6 +1158,21 @@ bool MachineSinking::registerPressureSetExceedsLimit(
11031158
return false;
11041159
}
11051160

1161+
// Recalculate RP and check if any pressure set exceeds the set limit.
1162+
bool MachineSinking::registerPressureExceedsLimit(
1163+
const MachineBasicBlock &MBB) {
1164+
std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB, false);
1165+
1166+
for (unsigned PS = 0; PS < BBRegisterPressure.size(); ++PS) {
1167+
if (BBRegisterPressure[PS] >=
1168+
TRI->getRegPressureSetLimit(*MBB.getParent(), PS)) {
1169+
return true;
1170+
}
1171+
}
1172+
1173+
return false;
1174+
}
1175+
11061176
/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
11071177
bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
11081178
MachineBasicBlock *MBB,
@@ -1581,83 +1651,98 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
15811651
return HasAliasedStore;
15821652
}
15831653

1584-
/// Sink instructions into cycles if profitable. This especially tries to
1585-
/// prevent register spills caused by register pressure if there is little to no
1586-
/// overhead moving instructions into cycles.
1587-
bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
1588-
LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
1654+
/// Aggressively sink instructions into cycles. This will aggressively try to
1655+
/// sink all instructions in the top-most preheaders in an attempt to reduce RP.
1656+
/// In particular, it will sink into multiple successor blocks without limits
1657+
/// based on the amount of sinking, or the type of ops being sunk (so long as
1658+
/// they are safe to sink).
1659+
bool MachineSinking::aggressivelySinkIntoCycle(
1660+
MachineCycle *Cycle, MachineInstr &I,
1661+
DenseMap<SinkItem, MachineInstr *> &SunkInstrs) {
1662+
// TODO: support instructions with multiple defs
1663+
if (I.getNumDefs() > 1)
1664+
return false;
1665+
1666+
LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I);
15891667
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
15901668
assert(Preheader && "Cycle sink needs a preheader block");
1591-
MachineBasicBlock *SinkBlock = nullptr;
1592-
bool CanSink = true;
1593-
const MachineOperand &MO = I.getOperand(0);
1594-
1595-
for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
1596-
LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI);
1597-
if (!Cycle->contains(MI.getParent())) {
1598-
LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n");
1599-
CanSink = false;
1600-
break;
1601-
}
1669+
SmallVector<std::pair<RegSubRegPair, MachineInstr *>> Uses;
16021670

1603-
// FIXME: Come up with a proper cost model that estimates whether sinking
1604-
// the instruction (and thus possibly executing it on every cycle
1605-
// iteration) is more expensive than a register.
1606-
// For now assumes that copies are cheap and thus almost always worth it.
1607-
if (!MI.isCopy()) {
1608-
LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n");
1609-
CanSink = false;
1610-
break;
1671+
MachineOperand &DefMO = I.getOperand(0);
1672+
for (MachineInstr &MI : MRI->use_instructions(DefMO.getReg())) {
1673+
Uses.push_back({{DefMO.getReg(), DefMO.getSubReg()}, &MI});
1674+
}
1675+
1676+
for (std::pair<RegSubRegPair, MachineInstr *> Entry : Uses) {
1677+
MachineInstr *MI = Entry.second;
1678+
LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Analysing use: " << MI);
1679+
if (MI->isPHI()) {
1680+
LLVM_DEBUG(
1681+
dbgs() << "AggressiveCycleSink: Not attempting to sink for PHI.\n");
1682+
continue;
16111683
}
1612-
if (!SinkBlock) {
1613-
SinkBlock = MI.getParent();
1614-
LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: "
1615-
<< printMBBReference(*SinkBlock) << "\n");
1684+
// We cannot sink before the prologue
1685+
if (MI->isPosition() || TII->isBasicBlockPrologue(*MI)) {
1686+
LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Use is BasicBlock prologue, "
1687+
"can't sink.\n");
16161688
continue;
16171689
}
1618-
SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
1619-
if (!SinkBlock) {
1620-
LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n");
1621-
CanSink = false;
1622-
break;
1690+
if (!Cycle->contains(MI->getParent())) {
1691+
LLVM_DEBUG(
1692+
dbgs() << "AggressiveCycleSink: Use not in cycle, can't sink.\n");
1693+
continue;
16231694
}
1624-
LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: "
1625-
<< printMBBReference(*SinkBlock) << "\n");
1626-
}
16271695

1628-
if (!CanSink) {
1629-
LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
1630-
return false;
1631-
}
1632-
if (!SinkBlock) {
1633-
LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
1634-
return false;
1635-
}
1636-
if (SinkBlock == Preheader) {
1637-
LLVM_DEBUG(
1638-
dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
1639-
return false;
1640-
}
1641-
if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) {
1642-
LLVM_DEBUG(
1643-
dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
1644-
return false;
1645-
}
1696+
MachineBasicBlock *SinkBlock = MI->getParent();
1697+
MachineInstr *NewMI = nullptr;
1698+
SinkItem MapEntry(&I, SinkBlock);
1699+
1700+
auto SI = SunkInstrs.find(MapEntry);
1701+
1702+
// Check for the case in which we have already sunk a copy of this
1703+
// instruction into the user block.
1704+
if (SI != SunkInstrs.end()) {
1705+
LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Already sunk to block: "
1706+
<< printMBBReference(*SinkBlock) << "\n");
1707+
NewMI = SI->second;
1708+
}
16461709

1647-
LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
1648-
SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
1649-
I);
1710+
// Create a copy of the instruction in the use block.
1711+
if (!NewMI) {
1712+
LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Sinking instruction to block: "
1713+
<< printMBBReference(*SinkBlock) << "\n");
1714+
1715+
NewMI = I.getMF()->CloneMachineInstr(&I);
1716+
if (DefMO.getReg().isVirtual()) {
1717+
const TargetRegisterClass *TRC = MRI->getRegClass(DefMO.getReg());
1718+
Register DestReg = MRI->createVirtualRegister(TRC);
1719+
NewMI->substituteRegister(DefMO.getReg(), DestReg, DefMO.getSubReg(),
1720+
*TRI);
1721+
}
1722+
SinkBlock->insert(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()),
1723+
NewMI);
1724+
SunkInstrs.insert({MapEntry, NewMI});
1725+
}
16501726

1651-
// Conservatively clear any kill flags on uses of sunk instruction
1652-
for (MachineOperand &MO : I.operands()) {
1653-
if (MO.isReg() && MO.readsReg())
1727+
// Conservatively clear any kill flags on uses of sunk instruction
1728+
for (MachineOperand &MO : NewMI->all_uses()) {
1729+
assert(MO.isReg() && MO.isUse());
16541730
RegsToClearKillFlags.insert(MO.getReg());
1655-
}
1731+
}
16561732

1657-
// The instruction is moved from its basic block, so do not retain the
1658-
// debug information.
1659-
assert(!I.isDebugInstr() && "Should not sink debug inst");
1660-
I.setDebugLoc(DebugLoc());
1733+
// The instruction is moved from its basic block, so do not retain the
1734+
// debug information.
1735+
assert(!NewMI->isDebugInstr() && "Should not sink debug inst");
1736+
NewMI->setDebugLoc(DebugLoc());
1737+
1738+
// Replace the use with the newly created virtual register.
1739+
RegSubRegPair &UseReg = Entry.first;
1740+
MI->substituteRegister(UseReg.Reg, NewMI->getOperand(0).getReg(),
1741+
UseReg.SubReg, *TRI);
1742+
}
1743+
// If we have replaced all uses, then delete the dead instruction
1744+
if (I.isDead(*MRI))
1745+
I.eraseFromParent();
16611746
return true;
16621747
}
16631748

0 commit comments

Comments
 (0)