Skip to content

Commit 0696412

Browse files
bcheng0127igcbot
authored andcommitted
fast compilation
1 parent 44d7955 commit 0696412

File tree

11 files changed

+196
-19
lines changed

11 files changed

+196
-19
lines changed

IGC/Compiler/CISACodeGen/CheckInstrTypes.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ CheckInstrTypes::CheckInstrTypes(IGC::SInstrTypes* instrList) : FunctionPass(ID)
8484
instrList->numOfLoop = 0;
8585
instrList->numInsts = 0;
8686
instrList->numAllocaInsts = 0;
87+
instrList->numGlobalInsts = 0;
88+
instrList->numLocalInsts = 0;
8789
instrList->sampleCmpToDiscardOptimizationPossible = false;
8890
instrList->sampleCmpToDiscardOptimizationSlot = 0;
8991
instrList->hasPullBary = false;
@@ -122,11 +124,28 @@ bool CheckInstrTypes::runOnFunction(Function& F)
122124
return false;
123125
}
124126

127+
void CheckInstrTypes::checkGlobalLocal(llvm::Instruction& I)
128+
{
129+
BasicBlock* dBB = I.getParent();
130+
131+
for (auto U : I.users()) {
132+
auto UI = dyn_cast<Instruction>(U);
133+
BasicBlock* uBB = UI->getParent();
134+
if (uBB != dBB)
135+
{
136+
g_InstrTypes->numGlobalInsts++;
137+
return;
138+
}
139+
}
140+
g_InstrTypes->numLocalInsts++;
141+
}
142+
125143
void CheckInstrTypes::visitInstruction(llvm::Instruction& I)
126144
{
127145
if (!llvm::isa<llvm::DbgInfoIntrinsic>(&I))
128146
{
129147
g_InstrTypes->numInsts++;
148+
checkGlobalLocal(I);
130149
}
131150

132151
if (I.getOpcode() == Instruction::FRem)
@@ -144,6 +163,7 @@ void CheckInstrTypes::visitInstruction(llvm::Instruction& I)
144163
void CheckInstrTypes::visitCallInst(CallInst& C)
145164
{
146165
g_InstrTypes->numInsts++;
166+
checkGlobalLocal(C);
147167
g_InstrTypes->hasCall = true;
148168

149169
Function* calledFunc = C.getCalledFunction();
@@ -298,35 +318,41 @@ void CheckInstrTypes::visitCallInst(CallInst& C)
298318
void CheckInstrTypes::visitBranchInst(BranchInst& I)
299319
{
300320
g_InstrTypes->numInsts++;
321+
checkGlobalLocal(I);
301322
}
302323

303324
void CheckInstrTypes::visitSwitchInst(SwitchInst& I)
304325
{
305326
g_InstrTypes->numInsts++;
327+
checkGlobalLocal(I);
306328
g_InstrTypes->hasSwitch = true;
307329
}
308330

309331
void CheckInstrTypes::visitIndirectBrInst(IndirectBrInst& I)
310332
{
311333
g_InstrTypes->numInsts++;
334+
checkGlobalLocal(I);
312335
g_InstrTypes->hasIndirectBranch = true;
313336
}
314337

315338
void CheckInstrTypes::visitICmpInst(ICmpInst& I)
316339
{
317340
g_InstrTypes->numInsts++;
341+
checkGlobalLocal(I);
318342
g_InstrTypes->hasCmp = true;
319343
}
320344

321345
void CheckInstrTypes::visitFCmpInst(FCmpInst& I)
322346
{
323347
g_InstrTypes->numInsts++;
348+
checkGlobalLocal(I);
324349
g_InstrTypes->hasCmp = true;
325350
}
326351

327352
void CheckInstrTypes::visitAllocaInst(AllocaInst& I)
328353
{
329354
g_InstrTypes->numInsts++;
355+
checkGlobalLocal(I);
330356
g_InstrTypes->numAllocaInsts++;
331357
if (I.isArrayAllocation() ||
332358
I.getAllocatedType()->isArrayTy() ||
@@ -355,6 +381,7 @@ void CheckInstrTypes::visitAllocaInst(AllocaInst& I)
355381
void CheckInstrTypes::visitLoadInst(LoadInst& I)
356382
{
357383
g_InstrTypes->numInsts++;
384+
checkGlobalLocal(I);
358385
g_InstrTypes->hasLoadStore = true;
359386
uint as = I.getPointerAddressSpace();
360387
switch (as)
@@ -387,6 +414,7 @@ void CheckInstrTypes::visitLoadInst(LoadInst& I)
387414
void CheckInstrTypes::visitStoreInst(StoreInst& I)
388415
{
389416
g_InstrTypes->numInsts++;
417+
checkGlobalLocal(I);
390418
g_InstrTypes->hasLoadStore = true;
391419
uint as = I.getPointerAddressSpace();
392420
if (as != ADDRESS_SPACE_PRIVATE)
@@ -423,18 +451,21 @@ void CheckInstrTypes::visitStoreInst(StoreInst& I)
423451
void CheckInstrTypes::visitPHINode(PHINode& PN)
424452
{
425453
g_InstrTypes->numInsts++;
454+
checkGlobalLocal(PN);
426455
g_InstrTypes->hasPhi = true;
427456
}
428457

429458
void CheckInstrTypes::visitSelectInst(SelectInst& I)
430459
{
431460
g_InstrTypes->numInsts++;
461+
checkGlobalLocal(I);
432462
g_InstrTypes->hasSel = true;
433463
}
434464

435465
void CheckInstrTypes::visitGetElementPtrInst(llvm::GetElementPtrInst& I)
436466
{
437467
g_InstrTypes->numInsts++;
468+
checkGlobalLocal(I);
438469
if (I.getPointerAddressSpace() == ADDRESS_SPACE_GENERIC)
439470
{
440471
g_InstrTypes->hasGenericAddressSpacePointers = true;

IGC/Compiler/CISACodeGen/CheckInstrTypes.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ namespace IGC
3535

3636
virtual bool runOnFunction(llvm::Function& F) override;
3737

38+
void checkGlobalLocal(llvm::Instruction& I);
39+
3840
virtual llvm::StringRef getPassName() const override
3941
{
4042
return "CheckInstrTypes";

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,13 @@ static void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSi
358358
bool isOptDisabled = ctx.getModuleMetaData()->compOpt.OptDisable;
359359
bool fastCompile = ctx.getModuleMetaData()->compOpt.FastCompilation;
360360
bool highAllocaPressure = ctx.m_instrTypes.numAllocaInsts > IGC_GET_FLAG_VALUE(AllocaRAPressureThreshold);
361-
361+
bool isPotentialHPCKernel = (ctx.m_instrTypes.numInsts > IGC_GET_FLAG_VALUE(HPCInstNumThreshold)) ||
362+
(ctx.m_instrTypes.numGlobalInsts > IGC_GET_FLAG_VALUE(HPCGlobalInstNumThreshold)) || IGC_GET_FLAG_VALUE(HPCFastCompilation);
363+
if (highAllocaPressure || isPotentialHPCKernel)
364+
{
365+
IGC_SET_FLAG_VALUE(FastCompileRA, 1);
366+
IGC_SET_FLAG_VALUE(HybridRAWithSpill, 1);
367+
}
362368

363369
if (IGC_IS_FLAG_ENABLED(ForceAllPrivateMemoryToSLM) ||
364370
IGC_IS_FLAG_ENABLED(ForcePrivateMemoryToSLMOnBuffers))
@@ -677,16 +683,21 @@ static void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSi
677683
if (!isOptDisabled)
678684
{
679685
// Optimize lower-level IR
680-
if (!fastCompile && !highAllocaPressure)
686+
if (!fastCompile && !highAllocaPressure && !isPotentialHPCKernel)
681687
{
682688
mpm.add(createIGCInstructionCombiningPass());
683689
}
684690
mpm.add(new GenSpecificPattern());
685-
if (!fastCompile && !highAllocaPressure)
691+
if (!fastCompile && !highAllocaPressure && !isPotentialHPCKernel)
686692
{
687693
mpm.add(createEarlyCSEPass());
688694
}
689-
if (!fastCompile && !highAllocaPressure && IGC_IS_FLAG_ENABLED(allowLICM) && ctx.m_retryManager.AllowLICM())
695+
else if (highAllocaPressure || isPotentialHPCKernel)
696+
{
697+
mpm.add(createSinkingPass());
698+
}
699+
if (!fastCompile && !highAllocaPressure && !isPotentialHPCKernel &&
700+
IGC_IS_FLAG_ENABLED(allowLICM) && ctx.m_retryManager.AllowLICM())
690701
{
691702
mpm.add(createLICMPass());
692703
}

IGC/Compiler/CodeGenPublic.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,8 @@ namespace IGC
264264
unsigned int numAllocaInsts;
265265
unsigned int numPsInputs;
266266
bool hasDynamicGenericLoadStore;
267+
unsigned int numGlobalInsts;
268+
unsigned int numLocalInsts;
267269
};
268270

269271
struct SSimplePushInfo

IGC/Compiler/LegalizationPass.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,19 @@ void Legalization::visitInstruction(llvm::Instruction& I)
159159
{
160160
if (!llvm::isa<llvm::DbgInfoIntrinsic>(&I))
161161
m_ctx->m_instrTypes.numInsts++;
162+
163+
BasicBlock* dBB = I.getParent();
164+
165+
for (auto U : I.users()) {
166+
auto UI = dyn_cast<Instruction>(U);
167+
BasicBlock* uBB = UI->getParent();
168+
if (uBB != dBB)
169+
{
170+
m_ctx->m_instrTypes.numGlobalInsts++;
171+
return;
172+
}
173+
}
174+
m_ctx->m_instrTypes.numLocalInsts++;
162175
}
163176

164177
void Legalization::visitBinaryOperator(llvm::BinaryOperator& I)

IGC/common/igc_flags.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,9 @@ DECLARE_IGC_REGKEY(bool, EnableA64WA, true, "Guarantee A64 load/store addres-hi
459459
DECLARE_IGC_REGKEY(bool, EnableSamplerSplit, true, "Split SIMD8 Sampler message to 2 subspans and SIMD16 to odd and even", false)
460460
DECLARE_IGC_REGKEY(bool, EnableEvaluateSamplerSplit, true, "Split evaluate messages to sampler into either SIMD8 or SIMD1 messages", false)
461461
DECLARE_IGC_REGKEY(DWORD, AllocaRAPressureThreshold, 500, "The threshold for the register pressure potential", false)
462+
DECLARE_IGC_REGKEY(DWORD, HPCInstNumThreshold, 50000, "The threshold for the register pressure potential", false)
463+
DECLARE_IGC_REGKEY(DWORD, HPCGlobalInstNumThreshold, 5000, "The threshold for the register pressure potential", false)
464+
DECLARE_IGC_REGKEY(bool, HPCFastCompilation, false, "Force to do fast compilation for HPC kernel", false)
462465
DECLARE_IGC_REGKEY(bool, UseOldSubRoutineAugIntf, false, "Use the old subroutine augmentation code which is slower", false)
463466
DECLARE_IGC_REGKEY(bool, FastCompileRA, false, "Provide the fast compilatoin path for RA, fail safe at first iteration", false)
464467
DECLARE_IGC_REGKEY(bool, HybridRAWithSpill, false, "Did Hybrid RA with Spill", false)

visa/G4_IR.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2833,6 +2833,7 @@ namespace vISA
28332833
bool isSpReg() const { return (reg.phyReg != NULL) && (reg.phyReg->isSpReg()); }
28342834

28352835
bool isRegAllocPartaker() const { return id != UNDEFINED_VAL; }
2836+
unsigned getRegAllocPartaker() const { return id; }
28362837
bool isAddress() const { return decl->getRegFile() == G4_ADDRESS; }
28372838
const G4_VarBase* getPhyReg() const { return reg.phyReg; }
28382839
G4_VarBase* getPhyReg() { return reg.phyReg; }

visa/GraphColor.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9855,20 +9855,20 @@ int GlobalRA::coloringRegAlloc()
98559855
bool fastCompile =
98569856
(builder.getOption(vISA_FastCompileRA) || builder.getOption(vISA_HybridRAWithSpill)) &&
98579857
!hasStackCall;
9858+
98589859
if (fastCompile)
98599860
{
98609861
fastCompileIter = 0;
98619862
}
9862-
unsigned failSafeRAIteration = (builder.getOption(vISA_FastSpill) || fastCompile) ? fastCompileIter : FAIL_SAFE_RA_LIMIT;
98639863

9864+
unsigned failSafeRAIteration = (builder.getOption(vISA_FastSpill) || fastCompile) ? fastCompileIter : FAIL_SAFE_RA_LIMIT;
98649865
bool rematDone = false;
98659866
VarSplit splitPass(*this);
98669867
if (kernel.getOption(vISA_SplitGRFAlignedScalar))
98679868
{
98689869
SplitAlignedScalars split(*this);
98699870
split.run();
98709871
}
9871-
98729872
while (iterationNo < maxRAIterations)
98739873
{
98749874
if (builder.getOption(vISA_RATrace))
@@ -10210,7 +10210,6 @@ int GlobalRA::coloringRegAlloc()
1021010210
// it modifies IR
1021110211
regChart->dumpRegChart(std::cerr);
1021210212
}
10213-
1021410213
expandSpillFillIntrinsics(nextSpillOffset);
1021510214
if (builder.getOption(vISA_OptReport))
1021610215
{
@@ -10253,7 +10252,6 @@ int GlobalRA::coloringRegAlloc()
1025310252
break;
1025410253
}
1025510254
}
10256-
1025710255
assignRegForAliasDcl();
1025810256
computePhyReg();
1025910257

visa/LocalRA.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,14 @@ bool LocalRA::localRA()
506506
std::cout << "\t--first-fit " << "RA\n";
507507
}
508508
globalLRSize = 0;
509+
if (builder.getOption(vISA_HybridRAWithSpill))
510+
{
511+
countLiveIntervals();
512+
}
513+
else
514+
{
515+
globalLRSize = 0;
516+
}
509517
evenAlign();
510518
needGlobalRA = localRAPass(false, doSplitLLR);
511519
}

0 commit comments

Comments
 (0)