Skip to content

Commit 167f3e8

Browse files
esukhovigcbot
authored andcommitted
Force Minimum Dispatch SIMD if register pressure is too high
1. Added mechanism for publishing and getting RegPressure info from FuncInfo Metadata. 2. CodeLoopSinking pass publishes best known at the moment metadata, to save compile time. 3. If pressure is too high (IGC_ForceRPELimit flag) we regress to min dispatch SIMD.
1 parent 43551f4 commit 167f3e8

File tree

10 files changed

+262
-9
lines changed

10 files changed

+262
-9
lines changed

IGC/Compiler/CISACodeGen/CodeSinking.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,20 @@ namespace IGC {
945945
return Max;
946946
}
947947

948+
// this function returns the best known regpressure, not up-to-date repgressure
949+
// it was implemented this way to cut compilation time costs
950+
uint CodeLoopSinking::getMaxRegCountForFunction(Function *F)
951+
{
952+
unsigned int MaxPressure = 0;
953+
for (auto BB : BBPressures)
954+
{
955+
if (BB.getFirst()->getParent() != F)
956+
continue;
957+
MaxPressure = std::max(BB.getSecond(), MaxPressure);
958+
}
959+
return MaxPressure;
960+
}
961+
948962
// Find the loops with too high regpressure and sink the instructions from
949963
// preheaders into them
950964
bool CodeLoopSinking::loopSink(Function &F)
@@ -958,6 +972,9 @@ namespace IGC {
958972
if (SinkMode != LoopSinkMode::NoSink)
959973
Changed |= loopSink(L, SinkMode);
960974
}
975+
976+
unsigned int MaxPressure = getMaxRegCountForFunction(&F);
977+
RPE->publishRegPressureMetadata(F, MaxPressure + FRPE->getExternalPressureForFunction(&F));
961978
return Changed;
962979
}
963980

IGC/Compiler/CISACodeGen/CodeSinking.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ namespace IGC {
165165
llvm::DenseMap<llvm::BasicBlock*, uint> BBPressures;
166166
LoopSinkMode needLoopSink(llvm::Loop* L);
167167
unsigned getMaxRegCountForLoop(llvm::Loop* L);
168+
unsigned getMaxRegCountForFunction(llvm::Function* F);
168169
};
169170

170171
void initializeCodeLoopSinkingPass(llvm::PassRegistry&);

IGC/Compiler/CISACodeGen/IGCLivenessAnalysis.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ void IGCRegisterPressurePrinter::printIntraBlock(llvm::BasicBlock &BB,
369369
}
370370
unsigned int SizeInBytes = BBListing[Inst];
371371
unsigned int AmountOfRegistersRoundUp = RPE->bytesToRegisters(SizeInBytes);
372-
MaxPressureInKernel = std::max(MaxPressureInKernel, AmountOfRegistersRoundUp);
372+
MaxPressureInFunction = std::max(MaxPressureInFunction, AmountOfRegistersRoundUp);
373373
Output += std::to_string(SizeInBytes) + " (" +
374374
std::to_string(AmountOfRegistersRoundUp) + ")" + " \t";
375375
printInstruction(Inst, Output);
@@ -419,7 +419,7 @@ void IGCRegisterPressurePrinter::dumpRegPressure(llvm::Function &F,
419419
}
420420

421421
OutputFile << "==============================================" << "\n";
422-
OutputFile << "MaxPressure In Kernel: " << MaxPressureInKernel << "\n";
422+
OutputFile << "MaxPressure In Function: " << MaxPressureInFunction << "\n";
423423

424424
OutputFile.close();
425425
}
@@ -554,7 +554,7 @@ bool IGCRegisterPressurePrinter::runOnFunction(llvm::Function &F) {
554554
RPE = &getAnalysis<IGCLivenessAnalysis>();
555555
WI = &getAnalysis<WIAnalysis>();
556556
CGCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
557-
MaxPressureInKernel = 0;
557+
MaxPressureInFunction = 0;
558558

559559
unsigned int SIMD = numLanes(RPE->bestGuessSIMDSize(&F));
560560

@@ -565,13 +565,15 @@ bool IGCRegisterPressurePrinter::runOnFunction(llvm::Function &F) {
565565
// basically only for LIT testing
566566
std::string Output;
567567
// no particular reason behind this, just big enough power of 2
568-
// helps to reduce printing time, by preemptively allocating
569-
// memory
568+
// helps to reduce printing time, by preemptively allocating memory
570569
Output.reserve(32768);
571570
Output += "SIMD: " + std::to_string(SIMD) + ", external pressure: " + std::to_string(ExternalPressure) + "\n";
572571
for (BasicBlock &BB : F) {
573572
printSets(&BB, Output, SIMD);
574573
}
574+
Output += "\n";
575+
Output += "==============================================\n";
576+
Output += "MaxPressure In Function: " + F.getName().str() + " --> " + std::to_string(MaxPressureInFunction) + "\n";
575577
PRINT(Output);
576578
Output.clear();
577579
}

IGC/Compiler/CISACodeGen/IGCLivenessAnalysis.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,12 @@ namespace IGC {
8585
class IGCLivenessAnalysis : public llvm::FunctionPass, public IGCLivenessAnalysisBase {
8686
public:
8787

88-
88+
void publishRegPressureMetadata(llvm::Function& F, unsigned int MaxPressure) {
89+
if (MDUtils->findFunctionsInfoItem(&F) != MDUtils->end_FunctionsInfo()) {
90+
IGC::IGCMD::FunctionInfoMetaDataHandle funcInfoMD = MDUtils->getFunctionsInfoItem(&F);
91+
funcInfoMD->getMaxRegPressure()->setMaxPressure(MaxPressure);
92+
}
93+
}
8994

9095
unsigned int getMaxRegCountForBB(llvm::BasicBlock &BB, unsigned int SIMD, WIAnalysisRunner* WI = nullptr) {
9196
InsideBlockPressureMap PressureMap;
@@ -255,7 +260,7 @@ class IGCRegisterPressurePrinter : public llvm::FunctionPass {
255260
unsigned int PrinterType = IGC_GET_FLAG_VALUE(RegPressureVerbocity);
256261
// maximum potential calling context pressure of a function
257262
unsigned int ExternalPressure = 0;
258-
unsigned int MaxPressureInKernel = 0;
263+
unsigned int MaxPressureInFunction = 0;
259264

260265
void intraBlock(llvm::BasicBlock &BB, std::string &Output, unsigned int SIMD);
261266
void dumpRegPressure(llvm::Function &F, unsigned int SIMD);

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3684,6 +3684,7 @@ namespace IGC
36843684
FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(&F);
36853685
int simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
36863686
bool hasSubGroupForce = hasSubGroupIntrinsicPVC(F);
3687+
unsigned int maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
36873688

36883689
// Finds the kernel and get the group simd size from the kernel
36893690
if (m_FGA)
@@ -3693,6 +3694,7 @@ namespace IGC
36933694
Kernel = FG->getHead();
36943695
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
36953696
simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
3697+
maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
36963698
}
36973699

36983700
auto FG = m_FGA ? m_FGA->getGroup(&F) : nullptr;
@@ -3701,8 +3703,16 @@ namespace IGC
37013703
bool hasSubroutine = FG && !FG->isSingle() && !hasStackCall && !isIndirectGroup;
37023704
bool forceLowestSIMDForStackCalls = IGC_IS_FLAG_ENABLED(ForceLowestSIMDForStackCalls) && (hasStackCall || isIndirectGroup);
37033705

3706+
37043707
if (simd_size == 0)
37053708
{
3709+
if (maxPressure >= IGC_GET_FLAG_VALUE(ForceSIMDRPELimit) &&
3710+
simdMode != SIMDMode::SIMD16)
3711+
{
3712+
pCtx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
3713+
funcInfoMD->getSubGroupSize()->setSIMDSize(16);
3714+
return SIMDStatus::SIMD_FUNC_FAIL;
3715+
}
37063716
if (hasSubroutine &&
37073717
simdMode != SIMDMode::SIMD16)
37083718
{
@@ -3840,6 +3850,7 @@ namespace IGC
38403850
ModuleMetaData* modMD = pCtx->getModuleMetaData();
38413851
FunctionInfoMetaDataHandle funcInfoMD = pMdUtils->getFunctionsInfoItem(&F);
38423852
int simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
3853+
unsigned int maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
38433854

38443855
// Finds the kernel and get the group simd size from the kernel
38453856
if (m_FGA)
@@ -3849,6 +3860,7 @@ namespace IGC
38493860
Kernel = FG->getHead();
38503861
funcInfoMD = pMdUtils->getFunctionsInfoItem(Kernel);
38513862
simd_size = funcInfoMD->getSubGroupSize()->getSIMDSize();
3863+
maxPressure = funcInfoMD->getMaxRegPressure()->getMaxPressure();
38523864
}
38533865

38543866
// For simd variant functions, detect which SIMD sizes are needed
@@ -3908,6 +3920,14 @@ namespace IGC
39083920
return SIMDStatus::SIMD_FUNC_FAIL;
39093921
}
39103922

3923+
if (maxPressure >= IGC_GET_FLAG_VALUE(ForceSIMDRPELimit) &&
3924+
simdMode != SIMDMode::SIMD8)
3925+
{
3926+
pCtx->SetSIMDInfo(SIMD_SKIP_HW, simdMode, ShaderDispatchMode::NOT_APPLICABLE);
3927+
funcInfoMD->getSubGroupSize()->setSIMDSize(8);
3928+
return SIMDStatus::SIMD_FUNC_FAIL;
3929+
}
3930+
39113931
// Just subroutines and subgroup size is not set, default to SIMD8
39123932
if (hasSubroutine &&
39133933
simdMode != SIMDMode::SIMD8)

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -751,8 +751,6 @@ void AddLegalizationPasses(CodeGenContext& ctx, IGCPassManager& mpm, PSSignature
751751
if (IGC_IS_FLAG_ENABLED(EnableAdvMemOpt))
752752
mpm.add(createAdvMemOptPass());
753753

754-
if(IGC_IS_FLAG_SET(DumpRegPressureEstimate)) mpm.add(new IGCRegisterPressurePrinter("final"));
755-
756754
if (doLdStCombine(&ctx)) {
757755
// Once it is stable, no split 64bit store/load anymore.
758756
mpm.add(createLdStCombinePass());

IGC/Compiler/MetaDataApi/MetaDataApi.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,66 @@ namespace IGC::IGCMD {
183183
m_ArgDependency.save(context, getNumberedNode(pNode, 1));
184184
}
185185

186+
MaxRegPressureMetaData::MaxRegPressureMetaData(const llvm::MDNode* pNode, bool hasId) :
187+
_Mybase(pNode, hasId),
188+
m_MaxPressure(getNumberedNode(pNode, 0)),
189+
m_pNode(pNode)
190+
{}
191+
192+
MaxRegPressureMetaData::MaxRegPressureMetaData() :
193+
m_pNode(nullptr)
194+
{}
195+
196+
MaxRegPressureMetaData::MaxRegPressureMetaData(const char* name) :
197+
_Mybase(name),
198+
m_pNode(nullptr)
199+
{}
200+
201+
bool MaxRegPressureMetaData::hasValue() const
202+
{
203+
return m_MaxPressure.hasValue() ||
204+
nullptr != m_pNode ||
205+
dirty();
206+
}
207+
208+
bool MaxRegPressureMetaData::dirty() const
209+
{
210+
return m_MaxPressure.dirty();
211+
}
212+
213+
void MaxRegPressureMetaData::discardChanges()
214+
{
215+
m_MaxPressure.discardChanges();
216+
}
217+
218+
llvm::Metadata* MaxRegPressureMetaData::generateNode(llvm::LLVMContext& context) const
219+
{
220+
llvm::SmallVector<llvm::Metadata*, 5> args;
221+
222+
llvm::Metadata* pIDNode = IMetaDataObject::generateNode(context);
223+
if (nullptr != pIDNode)
224+
{
225+
args.push_back(pIDNode);
226+
}
227+
228+
args.push_back(m_MaxPressure.generateNode(context));
229+
230+
return llvm::MDNode::get(context, args);
231+
}
232+
233+
void MaxRegPressureMetaData::save(llvm::LLVMContext& context, llvm::MDNode* pNode) const
234+
{
235+
IGC_ASSERT_MESSAGE(nullptr != pNode, "The target node should be valid pointer");
236+
237+
// we assume that underlying metadata node has not changed under our foot
238+
if (pNode == m_pNode && !dirty())
239+
{
240+
return;
241+
}
242+
243+
m_MaxPressure.save(context, getNumberedNode(pNode, 0));
244+
}
245+
186246
SubGroupSizeMetaData::SubGroupSizeMetaData(const llvm::MDNode* pNode, bool hasId) :
187247
_Mybase(pNode, hasId),
188248
m_SIMDSize(getNumberedNode(pNode, 0)),
@@ -389,6 +449,7 @@ namespace IGC::IGCMD {
389449
m_ThreadGroupSize(new ThreadGroupSizeMetaData(getNamedNode(pNode, "thread_group_size"), true)),
390450
m_ThreadGroupSizeHint(new ThreadGroupSizeMetaData(getNamedNode(pNode, "thread_group_size_hint"), true)),
391451
m_SubGroupSize(new SubGroupSizeMetaData(getNamedNode(pNode, "sub_group_size"), true)),
452+
m_MaxRegPressure(new MaxRegPressureMetaData(getNamedNode(pNode, "max_reg_pressure"), true)),
392453
m_OpenCLVectorTypeHint(new VectorTypeHintMetaData(getNamedNode(pNode, "opencl_vec_type_hint"), true)),
393454
m_pNode(pNode)
394455
{}
@@ -400,6 +461,7 @@ namespace IGC::IGCMD {
400461
m_ThreadGroupSize(new ThreadGroupSizeMetaDataHandle::ObjectType("thread_group_size")),
401462
m_ThreadGroupSizeHint(new ThreadGroupSizeMetaDataHandle::ObjectType("thread_group_size_hint")),
402463
m_SubGroupSize(new SubGroupSizeMetaDataHandle::ObjectType("sub_group_size")),
464+
m_MaxRegPressure(new MaxRegPressureMetaDataHandle::ObjectType("max_reg_pressure")),
403465
m_OpenCLVectorTypeHint(new VectorTypeHintMetaDataHandle::ObjectType("opencl_vec_type_hint")),
404466
m_pNode(nullptr)
405467
{}
@@ -411,6 +473,7 @@ namespace IGC::IGCMD {
411473
m_ThreadGroupSize(new ThreadGroupSizeMetaDataHandle::ObjectType("thread_group_size")),
412474
m_ThreadGroupSizeHint(new ThreadGroupSizeMetaDataHandle::ObjectType("thread_group_size_hint")),
413475
m_SubGroupSize(new SubGroupSizeMetaDataHandle::ObjectType("sub_group_size")),
476+
m_MaxRegPressure(new MaxRegPressureMetaDataHandle::ObjectType("max_reg_pressure")),
414477
m_OpenCLVectorTypeHint(new VectorTypeHintMetaDataHandle::ObjectType("opencl_vec_type_hint")),
415478
m_pNode(nullptr)
416479
{}
@@ -423,6 +486,7 @@ namespace IGC::IGCMD {
423486
m_ThreadGroupSize->hasValue() ||
424487
m_ThreadGroupSizeHint->hasValue() ||
425488
m_SubGroupSize->hasValue() ||
489+
m_MaxRegPressure->hasValue() ||
426490
m_OpenCLVectorTypeHint->hasValue() ||
427491
nullptr != m_pNode ||
428492
dirty();
@@ -436,6 +500,7 @@ namespace IGC::IGCMD {
436500
m_ThreadGroupSize.dirty() ||
437501
m_ThreadGroupSizeHint.dirty() ||
438502
m_SubGroupSize.dirty() ||
503+
m_MaxRegPressure.dirty() ||
439504
m_OpenCLVectorTypeHint.dirty();
440505
}
441506

@@ -447,6 +512,7 @@ namespace IGC::IGCMD {
447512
m_ThreadGroupSize.discardChanges();
448513
m_ThreadGroupSizeHint.discardChanges();
449514
m_SubGroupSize.discardChanges();
515+
m_MaxRegPressure.discardChanges();
450516
m_OpenCLVectorTypeHint.discardChanges();
451517
}
452518

@@ -481,6 +547,10 @@ namespace IGC::IGCMD {
481547
{
482548
args.push_back(m_SubGroupSize.generateNode(context));
483549
}
550+
if (m_MaxRegPressure->hasValue())
551+
{
552+
args.push_back(m_MaxRegPressure.generateNode(context));
553+
}
484554
if (m_OpenCLVectorTypeHint->hasValue())
485555
{
486556
args.push_back(m_OpenCLVectorTypeHint.generateNode(context));
@@ -504,6 +574,7 @@ namespace IGC::IGCMD {
504574
m_ThreadGroupSize.save(context, getNamedNode(pNode, "thread_group_size"));
505575
m_ThreadGroupSizeHint.save(context, getNamedNode(pNode, "thread_group_size_hint"));
506576
m_SubGroupSize.save(context, getNamedNode(pNode, "sub_group_size"));
577+
m_MaxRegPressure.save(context, getNamedNode(pNode, "max_reg_pressure"));
507578
m_OpenCLVectorTypeHint.save(context, getNamedNode(pNode, "opencl_vec_type_hint"));
508579
}
509580
}

0 commit comments

Comments
 (0)