Skip to content

Commit 5dddd84

Browse files
committed
Use stage approach
Change-Id: I4082bd57dd03236e4d578dac4804949544f4dcf2
1 parent 16b16e3 commit 5dddd84

File tree

10 files changed

+1364
-946
lines changed

10 files changed

+1364
-946
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1806,11 +1806,6 @@ class TargetInstrInfo : public MCInstrInfo {
18061806
unsigned defaultDefLatency(const MCSchedModel &SchedModel,
18071807
const MachineInstr &DefMI) const;
18081808

1809-
/// Return true if this instruction is considered low latency.
1810-
virtual bool isLowLatencyInstruction(const MachineInstr &MI) const {
1811-
return false;
1812-
};
1813-
18141809
/// Return true if this opcode has high latency to its result.
18151810
virtual bool isHighLatencyDef(int opc) const { return false; }
18161811

llvm/lib/CodeGen/MachineSink.cpp

Lines changed: 80 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "llvm/CodeGen/TargetInstrInfo.h"
4545
#include "llvm/CodeGen/TargetPassConfig.h"
4646
#include "llvm/CodeGen/TargetRegisterInfo.h"
47+
#include "llvm/CodeGen/TargetSchedule.h"
4748
#include "llvm/CodeGen/TargetSubtargetInfo.h"
4849
#include "llvm/IR/BasicBlock.h"
4950
#include "llvm/IR/DebugInfoMetadata.h"
@@ -100,12 +101,6 @@ static cl::opt<bool>
100101
"register spills"),
101102
cl::init(false), cl::Hidden);
102103

103-
static cl::opt<bool> AggressivelySinkInstsIntoCycle(
104-
"aggressive-sink-insts-into-cycles",
105-
cl::desc("Aggressively sink instructions into cycles to avoid "
106-
"register spills"),
107-
cl::init(false), cl::Hidden);
108-
109104
static cl::opt<unsigned> SinkIntoCycleLimit(
110105
"machine-sink-cycle-limit",
111106
cl::desc(
@@ -135,6 +130,7 @@ class MachineSinking : public MachineFunctionPass {
135130
const MachineBranchProbabilityInfo *MBPI = nullptr;
136131
AliasAnalysis *AA = nullptr;
137132
RegisterClassInfo RegClassInfo;
133+
TargetSchedModel SchedModel;
138134

139135
// Remember which edges have been considered for breaking.
140136
SmallSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>, 8>
@@ -262,7 +258,6 @@ class MachineSinking : public MachineFunctionPass {
262258

263259
void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
264260
SmallVectorImpl<MachineInstr *> &Candidates);
265-
bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
266261

267262
bool isDead(const MachineInstr *MI) const;
268263
bool aggressivelySinkIntoCycle(
@@ -284,11 +279,14 @@ class MachineSinking : public MachineFunctionPass {
284279
GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
285280
AllSuccsCache &AllSuccessors) const;
286281

287-
std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB);
282+
std::vector<unsigned> &getBBRegisterPressure(const MachineBasicBlock &MBB,
283+
bool UseCache = true);
288284

289285
bool registerPressureSetExceedsLimit(unsigned NRegs,
290286
const TargetRegisterClass *RC,
291287
const MachineBasicBlock &MBB);
288+
289+
bool registerPressureExceedsLimit(const MachineBasicBlock &MBB);
292290
};
293291

294292
} // end anonymous namespace
@@ -787,48 +785,63 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
787785
EverMadeChange = true;
788786
}
789787

790-
if (SinkInstsIntoCycle || AggressivelySinkInstsIntoCycle) {
788+
if (SinkInstsIntoCycle) {
791789
SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_cycles());
790+
SchedModel.init(STI);
791+
enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END };
792792

793-
DenseMap<std::pair<MachineInstr *, MachineBasicBlock *>, MachineInstr *>
794-
SunkInstrs;
795-
for (auto *Cycle : Cycles) {
796-
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
797-
if (!Preheader) {
798-
LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
799-
continue;
800-
}
801-
SmallVector<MachineInstr *, 8> Candidates;
802-
FindCycleSinkCandidates(Cycle, Preheader, Candidates);
803-
804-
// Walk the candidates in reverse order so that we start with the use
805-
// of a def-use chain, if there is any.
806-
// TODO: Sort the candidates using a cost-model.
807-
unsigned i = 0;
808-
809-
for (MachineInstr *I : llvm::reverse(Candidates)) {
810-
// AggressivelySinkInstsIntoCycle sinks a superset of instructions
811-
// relative to regular cycle sinking. Thus, this option supercedes
812-
// captures all sinking opportunites done
813-
if (AggressivelySinkInstsIntoCycle) {
814-
aggressivelySinkIntoCycle(Cycle, *I, SunkInstrs);
815-
EverMadeChange = true;
816-
++NumCycleSunk;
793+
CycleSinkStage Stage = CycleSinkStage::COPY;
794+
bool HasHighPressure;
795+
do {
796+
HasHighPressure = false;
797+
DenseMap<std::pair<MachineInstr *, MachineBasicBlock *>, MachineInstr *>
798+
SunkInstrs;
799+
for (auto *Cycle : Cycles) {
800+
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
801+
if (!Preheader) {
802+
LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
817803
continue;
818804
}
805+
SmallVector<MachineInstr *, 8> Candidates;
806+
FindCycleSinkCandidates(Cycle, Preheader, Candidates);
807+
808+
unsigned i = 0;
809+
810+
// Walk the candidates in reverse order so that we start with the use
811+
// of a def-use chain, if there is any.
812+
// TODO: Sort the candidates using a cost-model.
813+
for (MachineInstr *I : llvm::reverse(Candidates)) {
814+
// CycleSinkStage::COPY: Sink a limited number of copies
815+
if (Stage == CycleSinkStage::COPY) {
816+
if (i++ == SinkIntoCycleLimit) {
817+
LLVM_DEBUG(dbgs()
818+
<< "CycleSink: Limit reached of instructions to "
819+
"be analysed.");
820+
break;
821+
}
822+
823+
if (!I->isCopy())
824+
continue;
825+
}
819826

820-
if (i++ == SinkIntoCycleLimit) {
821-
LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to "
822-
"be analysed.");
823-
break;
827+
// CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions
828+
// which the target specifies as low-latency
829+
if (Stage == CycleSinkStage::LOW_LATENCY &&
830+
!TII->hasLowDefLatency(SchedModel, *I, 0))
831+
continue;
832+
833+
if (!aggressivelySinkIntoCycle(Cycle, *I, SunkInstrs))
834+
break;
835+
EverMadeChange = true;
836+
++NumCycleSunk;
824837
}
825838

826-
if (!SinkIntoCycle(Cycle, *I))
827-
break;
828-
EverMadeChange = true;
829-
++NumCycleSunk;
839+
// Recalculate the pressure after sinking
840+
if (!HasHighPressure)
841+
HasHighPressure = registerPressureExceedsLimit(*Preheader);
830842
}
831-
}
843+
Stage = (CycleSinkStage)(Stage + 1);
844+
} while (HasHighPressure && Stage < CycleSinkStage::END);
832845
}
833846

834847
HasStoreCache.clear();
@@ -1081,13 +1094,15 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
10811094
}
10821095

10831096
std::vector<unsigned> &
1084-
MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
1097+
MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB,
1098+
bool UseCache) {
10851099
// Currently to save compiling time, MBB's register pressure will not change
10861100
// in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
10871101
// register pressure is changed after sinking any instructions into it.
10881102
// FIXME: need a accurate and cheap register pressure estiminate model here.
1103+
10891104
auto RP = CachedRegisterPressure.find(&MBB);
1090-
if (RP != CachedRegisterPressure.end())
1105+
if (UseCache && RP != CachedRegisterPressure.end())
10911106
return RP->second;
10921107

10931108
RegionPressure Pressure;
@@ -1111,6 +1126,12 @@ MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
11111126
}
11121127

11131128
RPTracker.closeRegion();
1129+
1130+
if (RP != CachedRegisterPressure.end()) {
1131+
CachedRegisterPressure[&MBB] = RPTracker.getPressure().MaxSetPressure;
1132+
return CachedRegisterPressure[&MBB];
1133+
}
1134+
11141135
auto It = CachedRegisterPressure.insert(
11151136
std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure));
11161137
return It.first->second;
@@ -1129,6 +1150,21 @@ bool MachineSinking::registerPressureSetExceedsLimit(
11291150
return false;
11301151
}
11311152

1153+
// Recalculate RP and check if any pressure set exceeds the set limit.
1154+
bool MachineSinking::registerPressureExceedsLimit(
1155+
const MachineBasicBlock &MBB) {
1156+
std::vector<unsigned> BBRegisterPressure = getBBRegisterPressure(MBB, false);
1157+
1158+
for (unsigned PS = 0; PS < BBRegisterPressure.size(); ++PS) {
1159+
if (BBRegisterPressure[PS] >=
1160+
TRI->getRegPressureSetLimit(*MBB.getParent(), PS)) {
1161+
return true;
1162+
}
1163+
}
1164+
1165+
return false;
1166+
}
1167+
11321168
/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
11331169
bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
11341170
MachineBasicBlock *MBB,
@@ -1656,10 +1692,6 @@ bool MachineSinking::aggressivelySinkIntoCycle(
16561692
if (I.getNumDefs() > 1)
16571693
return false;
16581694

1659-
// Only sink instructions which the target considers to be low latency
1660-
if (!TII->isLowLatencyInstruction(I))
1661-
return false;
1662-
16631695
LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I);
16641696
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
16651697
assert(Preheader && "Cycle sink needs a preheader block");
@@ -1741,86 +1773,6 @@ bool MachineSinking::aggressivelySinkIntoCycle(
17411773
return true;
17421774
}
17431775

1744-
/// Sink instructions into cycles if profitable. This especially tries to
1745-
/// prevent register spills caused by register pressure if there is little to no
1746-
/// overhead moving instructions into cycles.
1747-
bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
1748-
LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
1749-
MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
1750-
assert(Preheader && "Cycle sink needs a preheader block");
1751-
MachineBasicBlock *SinkBlock = nullptr;
1752-
bool CanSink = true;
1753-
const MachineOperand &MO = I.getOperand(0);
1754-
1755-
for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
1756-
LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI);
1757-
if (!Cycle->contains(MI.getParent())) {
1758-
LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n");
1759-
CanSink = false;
1760-
break;
1761-
}
1762-
1763-
// FIXME: Come up with a proper cost model that estimates whether sinking
1764-
// the instruction (and thus possibly executing it on every cycle
1765-
// iteration) is more expensive than a register.
1766-
// For now assumes that copies are cheap and thus almost always worth it.
1767-
if (!MI.isCopy()) {
1768-
LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n");
1769-
CanSink = false;
1770-
break;
1771-
}
1772-
if (!SinkBlock) {
1773-
SinkBlock = MI.getParent();
1774-
LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: "
1775-
<< printMBBReference(*SinkBlock) << "\n");
1776-
continue;
1777-
}
1778-
SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
1779-
if (!SinkBlock) {
1780-
LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n");
1781-
CanSink = false;
1782-
break;
1783-
}
1784-
LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: "
1785-
<< printMBBReference(*SinkBlock) << "\n");
1786-
}
1787-
1788-
if (!CanSink) {
1789-
LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
1790-
return false;
1791-
}
1792-
if (!SinkBlock) {
1793-
LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
1794-
return false;
1795-
}
1796-
if (SinkBlock == Preheader) {
1797-
LLVM_DEBUG(
1798-
dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
1799-
return false;
1800-
}
1801-
if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) {
1802-
LLVM_DEBUG(
1803-
dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
1804-
return false;
1805-
}
1806-
1807-
LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
1808-
SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
1809-
I);
1810-
1811-
// Conservatively clear any kill flags on uses of sunk instruction
1812-
for (MachineOperand &MO : I.operands()) {
1813-
if (MO.isReg() && MO.readsReg())
1814-
RegsToClearKillFlags.insert(MO.getReg());
1815-
}
1816-
1817-
// The instruction is moved from its basic block, so do not retain the
1818-
// debug information.
1819-
assert(!I.isDebugInstr() && "Should not sink debug inst");
1820-
I.setDebugLoc(DebugLoc());
1821-
return true;
1822-
}
1823-
18241776
/// SinkInstruction - Determine whether it is safe to sink the specified machine
18251777
/// instruction out of its current block into a successor.
18261778
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8676,13 +8676,7 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
86768676
bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const {
86778677
unsigned Opc = MI.getOpcode();
86788678

8679-
if (MI.isCopy() || isSMRD(Opc))
8680-
return true;
8681-
8682-
if (SchedModel.hasInstrSchedModel())
8683-
return SchedModel.computeInstrLatency(Opc) < 4;
8684-
8685-
return false;
8679+
return isSMRD(Opc);
86868680
}
86878681

86888682
bool SIInstrInfo::isHighLatencyDef(int Opc) const {

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1291,7 +1291,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
12911291
uint64_t getDefaultRsrcDataFormat() const;
12921292
uint64_t getScratchRsrcWords23() const;
12931293

1294-
bool isLowLatencyInstruction(const MachineInstr &MI) const override;
1294+
bool isLowLatencyInstruction(const MachineInstr &MI) const;
12951295
bool isHighLatencyDef(int Opc) const override;
12961296

12971297
/// Return the descriptor of the target-specific machine instruction

0 commit comments

Comments
 (0)