Skip to content

Commit 590830f

Browse files
admitricigcbot
authored andcommitted
CodeSinking compile time improvements
CodeSinking compile time improvements - Cache requested register pressure for BB in CodeSinking - Invalidate only the needed BBs to recompute IGCLivenessAnalysis faster after moving the instructions in a loop - Don't add the operands on the instruction to In set if they are in the same block (when it's enough for liveness compute)
1 parent aa0886f commit 590830f

File tree

7 files changed

+133
-23
lines changed

7 files changed

+133
-23
lines changed

IGC/Compiler/CISACodeGen/CodeSinking.cpp

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ namespace IGC {
241241
{
242242
MemoizedStoresInLoops.clear();
243243
BlacklistedLoops.clear();
244+
BBPressures.clear();
244245

245246
bool Changed = false;
246247
for (auto &L : LI->getLoopsInPreorder())
@@ -1328,6 +1329,26 @@ namespace IGC {
13281329
return changed;
13291330
}
13301331

1332+
// Implementation of RPE->getMaxRegCountForLoop(*L, SIMD, &WI);
1333+
// with per-BB pressure caching to improve compile-time
1334+
uint CodeSinking::getMaxRegCountForLoop(Loop *L)
1335+
{
1336+
IGC_ASSERT(RPE);
1337+
uint SIMD = numLanes(RPE->bestGuessSIMDSize());
1338+
unsigned int Max = 0;
1339+
for (BasicBlock *BB : L->getBlocks())
1340+
{
1341+
auto BBPressureEntry = BBPressures.try_emplace(BB);
1342+
unsigned int &BBPressure = BBPressureEntry.first->second;
1343+
if (BBPressureEntry.second) // BB was not in the set, need to recompute
1344+
{
1345+
BBPressure = RPE->getMaxRegCountForBB(*BB, SIMD, &WI);
1346+
}
1347+
Max = std::max(BBPressure, Max);
1348+
}
1349+
return Max;
1350+
}
1351+
13311352
LoopSinkMode CodeSinking::needLoopSink(Loop *L)
13321353
{
13331354
BasicBlock *Preheader = L->getLoopPreheader();
@@ -1358,7 +1379,7 @@ namespace IGC {
13581379
uint PreheaderDefsSizeInRegs = RPE->bytesToRegisters(PreheaderDefsSizeInBytes);
13591380

13601381
// Estimate max pressure in the loop and the external pressure
1361-
uint MaxLoopPressure = RPE->getMaxRegCountForLoop(*L, SIMD, &WI);
1382+
uint MaxLoopPressure = getMaxRegCountForLoop(L);
13621383
uint FunctionExternalPressure = FRPE ? FRPE->getExternalPressureForFunction(F) : 0;
13631384

13641385
auto isSinkCriteriaMet = [&](uint MaxLoopPressure)
@@ -1428,6 +1449,21 @@ namespace IGC {
14281449
PrintDump("Doing full sink.\n");
14291450
}
14301451

1452+
// We can only affect Preheader and the loop.
1453+
// Collect affected BBs to invalidate cached regpressure
1454+
// and request recomputation of liveness analysis preserving not affected BBs
1455+
BBSet AffectedBBs;
1456+
AffectedBBs.insert(Preheader);
1457+
for (BasicBlock *BB: L->blocks())
1458+
AffectedBBs.insert(BB);
1459+
1460+
auto rerunLiveness = [&]()
1461+
{
1462+
for (BasicBlock *BB: AffectedBBs)
1463+
BBPressures.erase(BB);
1464+
RPE->rerunLivenessAnalysis(*F, &AffectedBBs);
1465+
};
1466+
14311467
bool EverChanged = false;
14321468

14331469
// Find LIs in preheader that would definitely reduce
@@ -1489,6 +1525,7 @@ namespace IGC {
14891525
if (IterChanged)
14901526
{
14911527
EverChanged = true;
1528+
14921529
// Invoke LocalSink() to move def to its first use
14931530
if (LocalBlkSet.size() > 0)
14941531
{
@@ -1501,9 +1538,8 @@ namespace IGC {
15011538
LocalInstSet.clear();
15021539
}
15031540

1504-
uint SIMD = numLanes(RPE->bestGuessSIMDSize());
1505-
RPE->rerunLivenessAnalysis(*F);
1506-
MaxLoopPressure = RPE->getMaxRegCountForLoop(*L, SIMD, &WI);
1541+
rerunLiveness();
1542+
MaxLoopPressure = getMaxRegCountForLoop(L);
15071543
PrintDump("New max loop pressure = " << MaxLoopPressure << "\n");
15081544
if ((MaxLoopPressure < NeededRegpressure)
15091545
&& (Mode == LoopSinkMode::SinkWhileRegpressureIsHigh))
@@ -1554,6 +1590,7 @@ namespace IGC {
15541590
PrintDump(">> Reverting the changes.\n");
15551591

15561592
rollbackSinking(true, Preheader);
1593+
rerunLiveness();
15571594

15581595
return false;
15591596
}

IGC/Compiler/CISACodeGen/CodeSinking.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,9 @@ namespace IGC {
123123
llvm::SmallPtrSet<llvm::Loop*, 8> BlacklistedLoops;
124124
const StoresVec getAllStoresInLoop(llvm::Loop* L);
125125

126+
unsigned getMaxRegCountForLoop(llvm::Loop* L);
127+
llvm::DenseMap<llvm::BasicBlock*, uint> BBPressures;
128+
126129
void appendIfNotExist(InstPair src, std::vector<InstPair> &instMap)
127130
{
128131
if (std::find(instMap.begin(), instMap.end(), src) == instMap.end())

IGC/Compiler/CISACodeGen/IGCFunctionExternalPressure.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ void IGCFunctionExternalRegPressureAnalysis::combineOut(llvm::BasicBlock *BB, Va
8686
}
8787
}
8888

89-
void IGCFunctionExternalRegPressureAnalysis::addToSet(llvm::Instruction *Inst, ValueSet &Set) {
89+
void IGCFunctionExternalRegPressureAnalysis::addOperandsToSet(llvm::Instruction *Inst, ValueSet &Set) {
9090
for (auto &Op : Inst->operands()) {
9191
llvm::Value *V = Op.get();
9292
// We are counting only instructions right now
@@ -101,6 +101,26 @@ void IGCFunctionExternalRegPressureAnalysis::addToSet(llvm::Instruction *Inst, V
101101
}
102102
}
103103

104+
void IGCFunctionExternalRegPressureAnalysis::addNonLocalOperandsToSet(llvm::Instruction *Inst, ValueSet &Set) {
105+
for (auto &Op : Inst->operands()) {
106+
llvm::Value *V = Op.get();
107+
// We are counting only instructions right now
108+
// potetntially we should also count globals, but
109+
// we defintely shouldn't count:
110+
// br label %bb1 (basic block names)
111+
// call %functionName (function names)
112+
// add %a, 1 (constants)
113+
Instruction *I = dyn_cast<Instruction>(V);
114+
bool IsInstruction = I != nullptr;
115+
bool OperandInDifferentBB = IsInstruction && (I->getParent() != Inst->getParent());
116+
bool IsArgument = !IsInstruction && llvm::isa<llvm::Argument>(V);
117+
if (OperandInDifferentBB || IsArgument)
118+
{
119+
Set.insert(V);
120+
}
121+
}
122+
}
123+
104124

105125
void IGCFunctionExternalRegPressureAnalysis::collectPressureForBB(
106126
llvm::BasicBlock &BB, InsideBlockPressureMap &BBListing,
@@ -118,7 +138,7 @@ void IGCFunctionExternalRegPressureAnalysis::collectPressureForBB(
118138

119139
auto Phi = llvm::dyn_cast<llvm::PHINode>(Inst);
120140
if (!Phi) {
121-
addToSet(Inst, BBSet);
141+
addOperandsToSet(Inst, BBSet);
122142
}
123143

124144
BBListing[Inst] = Size;
@@ -155,7 +175,7 @@ void IGCFunctionExternalRegPressureAnalysis::processBlock(llvm::BasicBlock *BB,
155175
addToPhiSet(Phi, PhiSet);
156176
continue;
157177
}
158-
addToSet(Inst, Set);
178+
addNonLocalOperandsToSet(Inst, Set);
159179
}
160180
}
161181

IGC/Compiler/CISACodeGen/IGCFunctionExternalPressure.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ using namespace IGC;
2222
using namespace llvm;
2323

2424
typedef std::unordered_map<llvm::Value *, unsigned int> InclusionSet;
25-
typedef llvm::SmallPtrSet<llvm::Value *, 16> ValueSet;
25+
typedef llvm::SmallPtrSet<llvm::Value *, 32> ValueSet;
26+
typedef llvm::SmallPtrSet<llvm::BasicBlock *, 32> BBSet;
2627
typedef std::unordered_map<llvm::BasicBlock *, ValueSet> DFSet;
2728
typedef std::unordered_map<llvm::BasicBlock *, ValueSet> PhiSet;
2829
typedef std::unordered_map<llvm::BasicBlock *, PhiSet> InPhiSet;
@@ -63,7 +64,8 @@ class IGCFunctionExternalRegPressureAnalysis : public llvm::ModulePass {
6364
void addToPhiSet(llvm::PHINode *Phi, PhiSet *InPhiSet);
6465
void processBlock(llvm::BasicBlock *BB, ValueSet &Set, PhiSet *PhiSet);
6566
void livenessAnalysis(llvm::Function &F);
66-
void addToSet(llvm::Instruction *Inst, ValueSet &Set);
67+
void addOperandsToSet(llvm::Instruction *Inst, ValueSet &Set);
68+
void addNonLocalOperandsToSet(llvm::Instruction *Inst, ValueSet &Set);
6769

6870

6971
unsigned int registerSizeInBytes() {

IGC/Compiler/CISACodeGen/IGCLivenessAnalysis.cpp

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ void IGCLivenessAnalysis::combineOut(llvm::BasicBlock *BB, ValueSet *Set) {
9595
}
9696
}
9797

98-
void addToSet(llvm::Instruction *Inst, ValueSet &Set) {
98+
void IGCLivenessAnalysis::addOperandsToSet(llvm::Instruction *Inst, ValueSet &Set) {
9999
for (auto &Op : Inst->operands()) {
100100
llvm::Value *V = Op.get();
101101
// We are counting only instructions right now
@@ -110,6 +110,26 @@ void addToSet(llvm::Instruction *Inst, ValueSet &Set) {
110110
}
111111
}
112112

113+
void IGCLivenessAnalysis::addNonLocalOperandsToSet(llvm::Instruction *Inst, ValueSet &Set) {
114+
for (auto &Op : Inst->operands()) {
115+
llvm::Value *V = Op.get();
116+
// We are counting only instructions right now
117+
// potetntially we should also count globals, but
118+
// we defintely shouldn't count:
119+
// br label %bb1 (basic block names)
120+
// call %functionName (function names)
121+
// add %a, 1 (constants)
122+
Instruction *I = dyn_cast<Instruction>(V);
123+
bool IsInstruction = I != nullptr;
124+
bool OperandInDifferentBB = IsInstruction && (I->getParent() != Inst->getParent());
125+
bool IsArgument = !IsInstruction && llvm::isa<llvm::Argument>(V);
126+
if (OperandInDifferentBB || IsArgument)
127+
{
128+
Set.insert(V);
129+
}
130+
}
131+
}
132+
113133
// idea is simple, each predecessor block that converges into our block
114134
// has its own set of PHI values, that it has to deliver
115135
// so we take values that are coming from each block
@@ -126,6 +146,7 @@ void IGCLivenessAnalysis::addToPhiSet(llvm::PHINode *Phi, PhiSet *InPhiSet) {
126146
(*InPhiSet)[BB].insert(ValueFromOurBlock);
127147
}
128148
}
149+
129150
// scan through block in reversed order and add each operand
130151
// into IN block while deleting defined values
131152
void IGCLivenessAnalysis::processBlock(llvm::BasicBlock *BB, ValueSet &Set,
@@ -139,14 +160,27 @@ void IGCLivenessAnalysis::processBlock(llvm::BasicBlock *BB, ValueSet &Set,
139160
addToPhiSet(Phi, PhiSet);
140161
continue;
141162
}
142-
addToSet(Inst, Set);
163+
addNonLocalOperandsToSet(Inst, Set);
143164
}
144165
}
145166

146-
void IGCLivenessAnalysis::livenessAnalysis(llvm::Function &F) {
167+
void IGCLivenessAnalysis::livenessAnalysis(llvm::Function &F, BBSet *StartBBs) {
147168
std::queue<llvm::BasicBlock *> Worklist;
148-
for (BasicBlock &BB : F)
149-
Worklist.push(&BB);
169+
170+
if (StartBBs != nullptr)
171+
{
172+
// If StartBBs are provided we know that only these BBs could be changed
173+
// Add only them to the initial Worklist
174+
for (BasicBlock *BB : *StartBBs)
175+
Worklist.push(BB);
176+
}
177+
else
178+
{
179+
// Start with adding all BBs to the Worklist
180+
// to make sure In set is populated for every BB
181+
for (BasicBlock &BB : F)
182+
Worklist.push(&BB);
183+
}
150184

151185
while (!Worklist.empty()) {
152186

@@ -214,7 +248,7 @@ void IGCLivenessAnalysis::collectPressureForBB(
214248

215249
auto Phi = llvm::dyn_cast<llvm::PHINode>(Inst);
216250
if (!Phi) {
217-
addToSet(Inst, BBSet);
251+
addOperandsToSet(Inst, BBSet);
218252
}
219253

220254
BBListing[Inst] = Size;
@@ -225,7 +259,7 @@ void IGCLivenessAnalysis::collectPressureForBB(
225259
bool IGCLivenessAnalysis::runOnFunction(llvm::Function &F) {
226260

227261
CGCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
228-
livenessAnalysis(F);
262+
livenessAnalysis(F, nullptr);
229263

230264
return true;
231265
}

IGC/Compiler/CISACodeGen/IGCLivenessAnalysis.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,23 @@ class IGCLivenessAnalysis : public llvm::FunctionPass {
133133
// ...
134134
// rerunLivenessAnalysis()
135135
// collectPressureForBB()
136-
void rerunLivenessAnalysis(llvm::Function &F) {
137-
releaseMemory();
138-
livenessAnalysis(F);
136+
void rerunLivenessAnalysis(llvm::Function &F, BBSet *BBs = nullptr) {
137+
if (BBs != nullptr)
138+
{
139+
for (BasicBlock *BB : *BBs)
140+
{
141+
In[BB].clear();
142+
Out[BB].clear();
143+
InPhi[BB].clear();
144+
}
145+
}
146+
else
147+
{
148+
releaseMemory();
149+
}
150+
livenessAnalysis(F, BBs);
139151
}
140-
void livenessAnalysis(llvm::Function &F);
152+
void livenessAnalysis(llvm::Function &F, BBSet *StartBBs);
141153

142154
static char ID;
143155
llvm::StringRef getPassName() const override { return "IGCLivenessAnalysis"; }
@@ -154,6 +166,8 @@ class IGCLivenessAnalysis : public llvm::FunctionPass {
154166
void mergeSets(ValueSet *OutSet, llvm::BasicBlock *Succ);
155167
void combineOut(llvm::BasicBlock *BB, ValueSet *Set);
156168
void addToPhiSet(llvm::PHINode *Phi, PhiSet *InPhiSet);
169+
void addOperandsToSet(llvm::Instruction *Inst, ValueSet &Set);
170+
void addNonLocalOperandsToSet(llvm::Instruction *Inst, ValueSet &Set);
157171
void processBlock(llvm::BasicBlock *BB, ValueSet &Set, PhiSet *PhiSet);
158172

159173
};

IGC/Compiler/tests/RegPressureEstimator/ExternalPressureEstimator.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; UNSUPPORTED: system-windows
22
; REQUIRES: regkeys
3-
; RUN: igc_opt --igc-pressure-printer -S --disable-output --regkey=RegPressureVerbocity=5 < %s 2>&1 | FileCheck %s
3+
; RUN: igc_opt --igc-pressure-printer -S --disable-output --regkey=RegPressureVerbocity=1 < %s 2>&1 | FileCheck %s
44

55
define spir_func void @baz() nounwind {
66
entry:
@@ -83,8 +83,8 @@ entry:
8383
;CHECK: N: 0 (0) ret void
8484
;CHECK: SIMD: 8, external pressure: 0
8585
;CHECK: block: entry function: main
86-
;CHECK: IN: [ 4 ] %J, %base, %I, %offset,
87-
;CHECK: KILL: [ 4 ] %J, %base, %I, %offset,
86+
;CHECK: IN: [ 4 ]
87+
;CHECK: KILL: [ 4 ]
8888
;CHECK: OUT: [ 0 ]
8989
;CHECK: N: 256 (8) %baseArith = ptrtoint double addrspace(1)* %base to i64
9090
;CHECK: N: 256 (8) %basePtr = mul nuw nsw i64 %offset, 207368

0 commit comments

Comments
 (0)