Skip to content

Commit 49d16a0

Browse files
esukhovigcbot
authored andcommitted
Force Minimum Dispatch SIMD if register pressure is too high
1. Added mechanism for publishing and getting RegPressure info from FuncInfo Metadata. 2. CodeLoopSinking pass publishes best known at the moment metadata, to save compile time. 3. If pressure is too high (IGC_ForceRPELimit flag) we regress to min dispatch SIMD.
1 parent 42ddbf6 commit 49d16a0

File tree

10 files changed

+268
-16
lines changed

10 files changed

+268
-16
lines changed

IGC/Compiler/CISACodeGen/CodeSinking.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,20 @@ namespace IGC {
945945
return Max;
946946
}
947947

948+
// this function returns the best known regpressure, not up-to-date repgressure
949+
// it was implemented this way to cut compilation time costs
950+
uint CodeLoopSinking::getMaxRegCountForFunction(Function *F)
951+
{
952+
unsigned int MaxPressure = 0;
953+
for (auto BB : BBPressures)
954+
{
955+
if (BB.getFirst()->getParent() != F)
956+
continue;
957+
MaxPressure = std::max(BB.getSecond(), MaxPressure);
958+
}
959+
return MaxPressure;
960+
}
961+
948962
// Find the loops with too high regpressure and sink the instructions from
949963
// preheaders into them
950964
bool CodeLoopSinking::loopSink(Function &F)
@@ -958,6 +972,9 @@ namespace IGC {
958972
if (SinkMode != LoopSinkMode::NoSink)
959973
Changed |= loopSink(L, SinkMode);
960974
}
975+
976+
unsigned int MaxPressure = getMaxRegCountForFunction(&F);
977+
RPE->publishRegPressureMetadata(F, MaxPressure + FRPE->getExternalPressureForFunction(&F));
961978
return Changed;
962979
}
963980

IGC/Compiler/CISACodeGen/CodeSinking.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ namespace IGC {
165165
llvm::DenseMap<llvm::BasicBlock*, uint> BBPressures;
166166
LoopSinkMode needLoopSink(llvm::Loop* L);
167167
unsigned getMaxRegCountForLoop(llvm::Loop* L);
168+
unsigned getMaxRegCountForFunction(llvm::Function *F);
168169
};
169170

170171
void initializeCodeLoopSinkingPass(llvm::PassRegistry&);

IGC/Compiler/CISACodeGen/IGCLivenessAnalysis.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ void IGCRegisterPressurePrinter::printIntraBlock(llvm::BasicBlock &BB,
369369
}
370370
unsigned int SizeInBytes = BBListing[Inst];
371371
unsigned int AmountOfRegistersRoundUp = RPE->bytesToRegisters(SizeInBytes);
372-
MaxPressureInKernel = std::max(MaxPressureInKernel, AmountOfRegistersRoundUp);
372+
MaxPressureInFunction = std::max(MaxPressureInFunction, AmountOfRegistersRoundUp);
373373
Output += std::to_string(SizeInBytes) + " (" +
374374
std::to_string(AmountOfRegistersRoundUp) + ")" + " \t";
375375
printInstruction(Inst, Output);
@@ -419,7 +419,7 @@ void IGCRegisterPressurePrinter::dumpRegPressure(llvm::Function &F,
419419
}
420420

421421
OutputFile << "==============================================" << "\n";
422-
OutputFile << "MaxPressure In Kernel: " << MaxPressureInKernel << "\n";
422+
OutputFile << "MaxPressure In Function: " << MaxPressureInFunction << "\n";
423423

424424
OutputFile.close();
425425
}
@@ -554,7 +554,7 @@ bool IGCRegisterPressurePrinter::runOnFunction(llvm::Function &F) {
554554
RPE = &getAnalysis<IGCLivenessAnalysis>();
555555
WI = &getAnalysis<WIAnalysis>();
556556
CGCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
557-
MaxPressureInKernel = 0;
557+
MaxPressureInFunction = 0;
558558

559559
unsigned int SIMD = numLanes(RPE->bestGuessSIMDSize(&F));
560560

@@ -565,13 +565,15 @@ bool IGCRegisterPressurePrinter::runOnFunction(llvm::Function &F) {
565565
// basically only for LIT testing
566566
std::string Output;
567567
// no particular reason behind this, just big enough power of 2
568-
// helps to reduce printing time, by preemptively allocating
569-
// memory
568+
// helps to reduce printing time, by preemptively allocating memory
570569
Output.reserve(32768);
571570
Output += "SIMD: " + std::to_string(SIMD) + ", external pressure: " + std::to_string(ExternalPressure) + "\n";
572571
for (BasicBlock &BB : F) {
573572
printSets(&BB, Output, SIMD);
574573
}
574+
Output += "\n";
575+
Output += "==============================================\n";
576+
Output += "MaxPressure In Function: " + F.getName().str() + " --> " + std::to_string(MaxPressureInFunction) + "\n";
575577
PRINT(Output);
576578
Output.clear();
577579
}

IGC/Compiler/CISACodeGen/IGCLivenessAnalysis.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,6 @@ namespace IGC {
8585
class IGCLivenessAnalysis : public llvm::FunctionPass, public IGCLivenessAnalysisBase {
8686
public:
8787

88-
89-
9088
unsigned int getMaxRegCountForBB(llvm::BasicBlock &BB, unsigned int SIMD, WIAnalysisRunner* WI = nullptr) {
9189
InsideBlockPressureMap PressureMap;
9290
collectPressureForBB(BB, PressureMap, SIMD, WI);
@@ -98,6 +96,13 @@ class IGCLivenessAnalysis : public llvm::FunctionPass, public IGCLivenessAnalysi
9896
return bytesToRegisters(MaxSizeInBytes);
9997
}
10098

99+
void publishRegPressureMetadata(llvm::Function& F, unsigned int MaxPressure) {
100+
if (MDUtils->findFunctionsInfoItem(&F) != MDUtils->end_FunctionsInfo()) {
101+
IGC::IGCMD::FunctionInfoMetaDataHandle funcInfoMD = MDUtils->getFunctionsInfoItem(&F);
102+
funcInfoMD->getMaxRegPressure()->setMaxPressure(MaxPressure);
103+
}
104+
}
105+
101106
// be aware, for now, it doesn't count properly nested functions, and their
102107
// register pressure
103108
unsigned int getMaxRegCountForFunction(llvm::Function &F,
@@ -255,7 +260,7 @@ class IGCRegisterPressurePrinter : public llvm::FunctionPass {
255260
unsigned int PrinterType = IGC_GET_FLAG_VALUE(RegPressureVerbocity);
256261
// maximum potential calling context pressure of a function
257262
unsigned int ExternalPressure = 0;
258-
unsigned int MaxPressureInKernel = 0;
263+
unsigned int MaxPressureInFunction = 0;
259264

260265
void intraBlock(llvm::BasicBlock &BB, std::string &Output, unsigned int SIMD);
261266
void dumpRegPressure(llvm::Function &F, unsigned int SIMD);

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3660,6 +3660,7 @@ namespace IGC
36603660
FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(&F);
36613661
int simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
36623662
bool hasSubGroupForce = hasSubGroupIntrinsicPVC(F);
3663+
unsigned int max_pressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
36633664

36643665
// Finds the kernel and get the group simd size from the kernel
36653666
if (m_FGA)
@@ -3669,6 +3670,7 @@ namespace IGC
36693670
Kernel = FG->getHead();
36703671
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
36713672
simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
3673+
max_pressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
36723674
}
36733675

36743676
auto FG = m_FGA ? m_FGA->getGroup(&F) : nullptr;
@@ -3677,8 +3679,16 @@ namespace IGC
36773679
bool hasSubroutine = FG && !FG->isSingle() && !hasStackCall && !isIndirectGroup;
36783680
bool forceLowestSIMDForStackCalls = IGC_IS_FLAG_ENABLED(ForceLowestSIMDForStackCalls) && (hasStackCall || isIndirectGroup);
36793681

3682+
36803683
if (simd_size == 0)
36813684
{
3685+
if (max_pressure >= IGC_GET_FLAG_VALUE(ForceSIMDRPELimit) &&
3686+
simdMode != SIMDMode::SIMD16)
3687+
{
3688+
pCtx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
3689+
funcInfoMD->getSubGroupSize()->setSIMDSize(16);
3690+
return SIMDStatus::SIMD_FUNC_FAIL;
3691+
}
36823692
if (hasSubroutine &&
36833693
simdMode != SIMDMode::SIMD16)
36843694
{
@@ -3751,6 +3761,11 @@ namespace IGC
37513761
{
37523762
return SIMDStatus::SIMD_FUNC_FAIL;
37533763
}
3764+
if (simdMode == SIMDMode::SIMD16 && !hasSubGroupForce && !forceLowestSIMDForStackCalls && !hasSubroutine)
3765+
{
3766+
pCtx->SetSIMDInfo(SIMD_SKIP_PERF, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
3767+
return SIMDStatus::SIMD_FUNC_FAIL;
3768+
}
37543769

37553770
// Check if we force code generation for the current SIMD size.
37563771
// Note that for SIMD8, we always force it!
@@ -3760,12 +3775,6 @@ namespace IGC
37603775
return SIMDStatus::SIMD_PASS;
37613776
}
37623777

3763-
if (simdMode == SIMDMode::SIMD16 && !hasSubGroupForce && !forceLowestSIMDForStackCalls && !hasSubroutine)
3764-
{
3765-
pCtx->SetSIMDInfo(SIMD_SKIP_PERF, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
3766-
return SIMDStatus::SIMD_FUNC_FAIL;
3767-
}
3768-
37693778
if (simdMode == SIMDMode::SIMD32 && hasSubGroupForce)
37703779
{
37713780
pCtx->SetSIMDInfo(SIMD_SKIP_PERF, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
@@ -3816,6 +3825,7 @@ namespace IGC
38163825
ModuleMetaData* modMD = pCtx->getModuleMetaData();
38173826
FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(&F);
38183827
int simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
3828+
unsigned int max_pressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
38193829

38203830
// Finds the kernel and get the group simd size from the kernel
38213831
if (m_FGA)
@@ -3825,6 +3835,7 @@ namespace IGC
38253835
Kernel = FG->getHead();
38263836
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
38273837
simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
3838+
max_pressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
38283839
}
38293840

38303841
// For simd variant functions, detect which SIMD sizes are needed
@@ -3883,6 +3894,14 @@ namespace IGC
38833894
return SIMDStatus::SIMD_FUNC_FAIL;
38843895
}
38853896

3897+
if (max_pressure >= IGC_GET_FLAG_VALUE(ForceSIMDRPELimit) &&
3898+
simdMode != SIMDMode::SIMD8)
3899+
{
3900+
pCtx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
3901+
funcInfoMD->getSubGroupSize()->setSIMDSize(8);
3902+
return SIMDStatus::SIMD_FUNC_FAIL;
3903+
}
3904+
38863905
// Just subroutines and subgroup size is not set, default to SIMD8
38873906
if (hasSubroutine &&
38883907
simdMode != SIMDMode::SIMD8)

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -744,8 +744,6 @@ void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSignature
744744
if (IGC_IS_FLAG_ENABLED(EnableAdvMemOpt))
745745
mpm.add(createAdvMemOptPass());
746746

747-
if(IGC_IS_FLAG_SET(DumpRegPressureEstimate)) mpm.add(new IGCRegisterPressurePrinter("final"));
748-
749747
if (doLdStCombine(&ctx)) {
750748
// Once it is stable, no split 64bit store/load anymore.
751749
mpm.add(createLdStCombinePass());

IGC/Compiler/MetaDataApi/MetaDataApi.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,66 @@ namespace IGC::IGCMD {
183183
m_ArgDependency.save(context, getNumberedNode(pNode, 1));
184184
}
185185

186+
MaxRegPressureMetaData::MaxRegPressureMetaData(const llvm::MDNode* pNode, bool hasId) :
187+
_Mybase(pNode, hasId),
188+
m_MaxPressure(getNumberedNode(pNode, 0)),
189+
m_pNode(pNode)
190+
{}
191+
192+
MaxRegPressureMetaData::MaxRegPressureMetaData() :
193+
m_pNode(nullptr)
194+
{}
195+
196+
MaxRegPressureMetaData::MaxRegPressureMetaData(const char* name) :
197+
_Mybase(name),
198+
m_pNode(nullptr)
199+
{}
200+
201+
bool MaxRegPressureMetaData::hasValue() const
202+
{
203+
return m_MaxPressure.hasValue() ||
204+
nullptr != m_pNode ||
205+
dirty();
206+
}
207+
208+
bool MaxRegPressureMetaData::dirty() const
209+
{
210+
return m_MaxPressure.dirty();
211+
}
212+
213+
void MaxRegPressureMetaData::discardChanges()
214+
{
215+
m_MaxPressure.discardChanges();
216+
}
217+
218+
llvm::Metadata* MaxRegPressureMetaData::generateNode(llvm::LLVMContext& context) const
219+
{
220+
llvm::SmallVector<llvm::Metadata*, 5> args;
221+
222+
llvm::Metadata* pIDNode = IMetaDataObject::generateNode(context);
223+
if (nullptr != pIDNode)
224+
{
225+
args.push_back(pIDNode);
226+
}
227+
228+
args.push_back(m_MaxPressure.generateNode(context));
229+
230+
return llvm::MDNode::get(context, args);
231+
}
232+
233+
void MaxRegPressureMetaData::save(llvm::LLVMContext& context, llvm::MDNode* pNode) const
234+
{
235+
IGC_ASSERT_MESSAGE(nullptr != pNode, "The target node should be valid pointer");
236+
237+
// we assume that underlying metadata node has not changed under our foot
238+
if (pNode == m_pNode && !dirty())
239+
{
240+
return;
241+
}
242+
243+
m_MaxPressure.save(context, getNumberedNode(pNode, 0));
244+
}
245+
186246
SubGroupSizeMetaData::SubGroupSizeMetaData(const llvm::MDNode* pNode, bool hasId) :
187247
_Mybase(pNode, hasId),
188248
m_SIMDSize(getNumberedNode(pNode, 0)),
@@ -389,6 +449,7 @@ namespace IGC::IGCMD {
389449
m_ThreadGroupSize(new ThreadGroupSizeMetaData(getNamedNode(pNode, "thread_group_size"), true)),
390450
m_ThreadGroupSizeHint(new ThreadGroupSizeMetaData(getNamedNode(pNode, "thread_group_size_hint"), true)),
391451
m_SubGroupSize(new SubGroupSizeMetaData(getNamedNode(pNode, "sub_group_size"), true)),
452+
m_MaxRegPressure(new MaxRegPressureMetaData(getNamedNode(pNode, "max_reg_pressure"), true)),
392453
m_OpenCLVectorTypeHint(new VectorTypeHintMetaData(getNamedNode(pNode, "opencl_vec_type_hint"), true)),
393454
m_pNode(pNode)
394455
{}
@@ -400,6 +461,7 @@ namespace IGC::IGCMD {
400461
m_ThreadGroupSize(new ThreadGroupSizeMetaDataHandle::ObjectType("thread_group_size")),
401462
m_ThreadGroupSizeHint(new ThreadGroupSizeMetaDataHandle::ObjectType("thread_group_size_hint")),
402463
m_SubGroupSize(new SubGroupSizeMetaDataHandle::ObjectType("sub_group_size")),
464+
m_MaxRegPressure(new MaxRegPressureMetaDataHandle::ObjectType("max_reg_pressure")),
403465
m_OpenCLVectorTypeHint(new VectorTypeHintMetaDataHandle::ObjectType("opencl_vec_type_hint")),
404466
m_pNode(nullptr)
405467
{}
@@ -411,6 +473,7 @@ namespace IGC::IGCMD {
411473
m_ThreadGroupSize(new ThreadGroupSizeMetaDataHandle::ObjectType("thread_group_size")),
412474
m_ThreadGroupSizeHint(new ThreadGroupSizeMetaDataHandle::ObjectType("thread_group_size_hint")),
413475
m_SubGroupSize(new SubGroupSizeMetaDataHandle::ObjectType("sub_group_size")),
476+
m_MaxRegPressure(new MaxRegPressureMetaDataHandle::ObjectType("max_reg_pressure")),
414477
m_OpenCLVectorTypeHint(new VectorTypeHintMetaDataHandle::ObjectType("opencl_vec_type_hint")),
415478
m_pNode(nullptr)
416479
{}
@@ -423,6 +486,7 @@ namespace IGC::IGCMD {
423486
m_ThreadGroupSize->hasValue() ||
424487
m_ThreadGroupSizeHint->hasValue() ||
425488
m_SubGroupSize->hasValue() ||
489+
m_MaxRegPressure->hasValue() ||
426490
m_OpenCLVectorTypeHint->hasValue() ||
427491
nullptr != m_pNode ||
428492
dirty();
@@ -436,6 +500,7 @@ namespace IGC::IGCMD {
436500
m_ThreadGroupSize.dirty() ||
437501
m_ThreadGroupSizeHint.dirty() ||
438502
m_SubGroupSize.dirty() ||
503+
m_MaxRegPressure.dirty() ||
439504
m_OpenCLVectorTypeHint.dirty();
440505
}
441506

@@ -447,6 +512,7 @@ namespace IGC::IGCMD {
447512
m_ThreadGroupSize.discardChanges();
448513
m_ThreadGroupSizeHint.discardChanges();
449514
m_SubGroupSize.discardChanges();
515+
m_MaxRegPressure.discardChanges();
450516
m_OpenCLVectorTypeHint.discardChanges();
451517
}
452518

@@ -481,6 +547,10 @@ namespace IGC::IGCMD {
481547
{
482548
args.push_back(m_SubGroupSize.generateNode(context));
483549
}
550+
if (m_MaxRegPressure->hasValue())
551+
{
552+
args.push_back(m_MaxRegPressure.generateNode(context));
553+
}
484554
if (m_OpenCLVectorTypeHint->hasValue())
485555
{
486556
args.push_back(m_OpenCLVectorTypeHint.generateNode(context));
@@ -504,6 +574,7 @@ namespace IGC::IGCMD {
504574
m_ThreadGroupSize.save(context, getNamedNode(pNode, "thread_group_size"));
505575
m_ThreadGroupSizeHint.save(context, getNamedNode(pNode, "thread_group_size_hint"));
506576
m_SubGroupSize.save(context, getNamedNode(pNode, "sub_group_size"));
577+
m_MaxRegPressure.save(context, getNamedNode(pNode, "max_reg_pressure"));
507578
m_OpenCLVectorTypeHint.save(context, getNamedNode(pNode, "opencl_vec_type_hint"));
508579
}
509580
}

0 commit comments

Comments
 (0)