Skip to content

Commit 5eacf9f

Browse files
mkhoshzaigcbot
authored andcommitted
UMD support for loop unroll threshold
UMD support for loop unroll threshold
1 parent a2320c9 commit 5eacf9f

File tree

2 files changed

+65
-52
lines changed

2 files changed

+65
-52
lines changed

IGC/Compiler/GenTTI.cpp

Lines changed: 63 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ namespace llvm {
7171

7272
GenISAIntrinsic::ID ID = inst->getIntrinsicID();
7373
if (/*ID == llvm::GenISAIntrinsic::GenISA_typedwrite ||
74-
ID == llvm::GenISAIntrinsic::GenISA_typedread ||*/
74+
ID == llvm::GenISAIntrinsic::GenISA_typedread ||*/
7575
ID == llvm::GenISAIntrinsic::GenISA_URBRead ||
7676
isURBWriteIntrinsic(inst) ||
7777
ID == llvm::GenISAIntrinsic::GenISA_ldstructured)
@@ -89,7 +89,7 @@ namespace llvm {
8989
llvm::BasicBlock* BB = const_cast<llvm::BasicBlock*>(&*BBI);
9090
for (auto II = BB->begin(); II != BB->end(); II++)
9191
{
92-
if (llvm::GenIntrinsicInst * pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(II))
92+
if (llvm::GenIntrinsicInst* pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(II))
9393
{
9494
if (CheckSendMsg && isSendMessage(pIntrinsic))
9595
{
@@ -108,9 +108,9 @@ namespace llvm {
108108

109109
void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop* L,
110110
#if LLVM_VERSION_MAJOR >= 7
111-
ScalarEvolution & SE,
111+
ScalarEvolution& SE,
112112
#endif
113-
TTI::UnrollingPreferences & UP)
113+
TTI::UnrollingPreferences& UP)
114114
{
115115
unsigned LoopUnrollThreshold = ctx->m_DriverInfo.GetLoopUnrollThreshold();
116116

@@ -119,6 +119,17 @@ namespace llvm {
119119
{
120120
LoopUnrollThreshold = IGC_GET_FLAG_VALUE(SetLoopUnrollThreshold);
121121
}
122+
else
123+
{
124+
if (ctx->type == ShaderType::COMPUTE_SHADER && ctx->getModuleMetaData()->csInfo.SetLoopUnrollThreshold > 0)
125+
{
126+
LoopUnrollThreshold = ctx->getModuleMetaData()->csInfo.SetLoopUnrollThreshold;
127+
}
128+
else if (ctx->type == ShaderType::PIXEL_SHADER && ctx->getModuleMetaData()->compOpt.SetLoopUnrollThreshold > 0)
129+
{
130+
LoopUnrollThreshold = ctx->getModuleMetaData()->compOpt.SetLoopUnrollThreshold;
131+
}
132+
}
122133
unsigned totalInstCountInShader = countTotalInstructions(L->getBlocks()[0]->getParent());
123134
uint32_t registerPressureEst = (uint32_t)(IGC_GET_FLAG_VALUE(SetRegisterPressureThresholdForLoopUnroll) * (ctx->getNumGRFPerThread() / 128.0));
124135
bool lowPressure = (this->ctx->m_tempCount < registerPressureEst) && (totalInstCountInShader < LoopUnrollThreshold);
@@ -131,16 +142,16 @@ namespace llvm {
131142
UP.PartialThreshold = LoopUnrollThreshold;
132143
UP.Partial = true;
133144
}
134-
else // for high registry pressure shaders, limit the unrolling to small loops and only fully unroll
145+
else // for high registry pressure shaders, limit the unrolling to small loops and only fully unroll
135146
{
136-
if(IGC_GET_FLAG_VALUE(SetLoopUnrollThresholdForHighRegPressure) != 0)
147+
if (IGC_GET_FLAG_VALUE(SetLoopUnrollThresholdForHighRegPressure) != 0)
137148
UP.Threshold = IGC_GET_FLAG_VALUE(SetLoopUnrollThresholdForHighRegPressure);
138149
else
139150
UP.Threshold = 200;
140151
}
141152

142153
#if LLVM_VERSION_MAJOR == 4
143-
ScalarEvolution * SE = &dummyPass->getAnalysisIfAvailable<ScalarEvolutionWrapperPass>()->getSE();
154+
ScalarEvolution* SE = &dummyPass->getAnalysisIfAvailable<ScalarEvolutionWrapperPass>()->getSE();
144155
if (!SE)
145156
return;
146157
#endif
@@ -157,7 +168,7 @@ namespace llvm {
157168
if (ExitingBlock) {
158169
if (UP.Partial) {
159170
IGCLLVM::TerminatorInst* Term = ExitingBlock->getTerminator();
160-
if (BranchInst * BI = dyn_cast<BranchInst>(Term))
171+
if (BranchInst* BI = dyn_cast<BranchInst>(Term))
161172
{
162173
if (dyn_cast<FCmpInst>(BI->getCondition()))
163174
{
@@ -295,7 +306,7 @@ namespace llvm {
295306

296307
for (I = instructionList.begin(); I != instructionList.end(); I++)
297308
{
298-
if (llvm::GenIntrinsicInst * pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(I))
309+
if (llvm::GenIntrinsicInst* pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(I))
299310
{
300311
if (isSendMessage(pIntrinsic))
301312
{
@@ -307,8 +318,8 @@ namespace llvm {
307318
unsigned int estimateUnrolledInstCount = (instCount + sendMessage * 4) * TripCount;
308319
unsigned int unrollLimitInstCount = LoopUnrollThreshold > totalInstCountInShader ? LoopUnrollThreshold - totalInstCountInShader : 0;
309320
bool limitUnrolling = (estimateUnrolledInstCount > unrollLimitInstCount) ||
310-
(TripCount > unrollLimitInstCount) ||
311-
(instCount + sendMessage * 4 > unrollLimitInstCount);
321+
(TripCount > unrollLimitInstCount) ||
322+
(instCount + sendMessage * 4 > unrollLimitInstCount);
312323

313324
// if the loop doesn't have sample, skip the unrolling parameter change
314325
if (!sendMessage)
@@ -349,17 +360,17 @@ namespace llvm {
349360
// The following is only available and required from LLVM 3.7+.
350361
UP.AllowExpensiveTripCount = true;
351362

352-
if (MDNode * LoopID = L->getLoopID())
363+
if (MDNode* LoopID = L->getLoopID())
353364
{
354365
const llvm::StringRef maxIterMetadataNames = "spv.loop.iterations.max";
355366
#if LLVM_VERSION_MAJOR < 11
356367
const llvm::StringRef peelCountMetadataNames = "spv.loop.peel.count";
357368
#endif
358369
for (unsigned i = 0; i < LoopID->getNumOperands(); ++i)
359370
{
360-
if (MDNode * MD = llvm::dyn_cast<MDNode>(LoopID->getOperand(i)))
371+
if (MDNode* MD = llvm::dyn_cast<MDNode>(LoopID->getOperand(i)))
361372
{
362-
if (MDString * S = llvm::dyn_cast<MDString>(MD->getOperand(0)))
373+
if (MDString* S = llvm::dyn_cast<MDString>(MD->getOperand(0)))
363374
{
364375
if (maxIterMetadataNames.equals(S->getString()))
365376
{
@@ -381,21 +392,21 @@ namespace llvm {
381392
}
382393

383394
#if LLVM_VERSION_MAJOR >= 11
384-
// [LLVM-UPGRADE] Peeling information was separated
385-
// https://github.com/llvm/llvm-project/commit/e541e1b757237172c247904b670c9894d6b3759d
395+
// [LLVM-UPGRADE] Peeling information was separated
396+
// https://github.com/llvm/llvm-project/commit/e541e1b757237172c247904b670c9894d6b3759d
386397

387-
void GenIntrinsicsTTIImpl::getPeelingPreferences(Loop* L, ScalarEvolution& SE,
388-
llvm::TargetTransformInfo::PeelingPreferences& PP)
389-
{
390-
if (MDNode * LoopID = L->getLoopID())
398+
void GenIntrinsicsTTIImpl::getPeelingPreferences(Loop* L, ScalarEvolution& SE,
399+
llvm::TargetTransformInfo::PeelingPreferences& PP)
400+
{
401+
if (MDNode* LoopID = L->getLoopID())
391402
{
392403
const llvm::StringRef peelCountMetadataNames = "spv.loop.peel.count";
393404

394405
for (unsigned i = 0; i < LoopID->getNumOperands(); ++i)
395406
{
396-
if (MDNode * MD = llvm::dyn_cast<MDNode>(LoopID->getOperand(i)))
407+
if (MDNode* MD = llvm::dyn_cast<MDNode>(LoopID->getOperand(i)))
397408
{
398-
if (MDString * S = llvm::dyn_cast<MDString>(MD->getOperand(0)))
409+
if (MDString* S = llvm::dyn_cast<MDString>(MD->getOperand(0)))
399410
{
400411
if (peelCountMetadataNames.equals(S->getString()))
401412
{
@@ -408,12 +419,12 @@ namespace llvm {
408419
}
409420
}
410421

411-
}
422+
}
412423
#endif
413424

414425
bool GenIntrinsicsTTIImpl::isProfitableToHoist(Instruction* I)
415426
{
416-
if (auto * CI = dyn_cast<CallInst>(I))
427+
if (auto* CI = dyn_cast<CallInst>(I))
417428
{
418429
if (CI->isConvergent() &&
419430
#if LLVM_VERSION_MAJOR >= 7
@@ -432,7 +443,7 @@ namespace llvm {
432443
#if LLVM_VERSION_MAJOR <= 10
433444
unsigned GenIntrinsicsTTIImpl::getCallCost(const Function* F, ArrayRef<const Value*> Arguments
434445
#if LLVM_VERSION_MAJOR >= 9
435-
, const User * U
446+
, const User* U
436447
#endif
437448
) {
438449
IGC::CodeGenContext* CGC = this->ctx;
@@ -458,37 +469,37 @@ namespace llvm {
458469
);
459470
}
460471
#else
461-
// [LLVM-UPGRADE] moved from getCallCost to getUserCost
462-
// https://github.com/llvm/llvm-project/commit/2641a19981e71c887bece92074e00d1af3e716c9#diff-dd4bd65dc55d754674d9a945a0d22911
472+
// [LLVM-UPGRADE] moved from getCallCost to getUserCost
473+
// https://github.com/llvm/llvm-project/commit/2641a19981e71c887bece92074e00d1af3e716c9#diff-dd4bd65dc55d754674d9a945a0d22911
463474

464-
#if LLVM_VERSION_MAJOR <= 12
465-
int GenIntrinsicsTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands, TTI::TargetCostKind CostKind)
475+
#if LLVM_VERSION_MAJOR <= 12
476+
int GenIntrinsicsTTIImpl::getUserCost(const User* U, ArrayRef<const Value*> Operands, TTI::TargetCostKind CostKind)
466477
#else
467-
llvm::InstructionCost GenIntrinsicsTTIImpl::getUserCost(const User* U, ArrayRef<const Value*> Operands, TTI::TargetCostKind CostKind)
478+
llvm::InstructionCost GenIntrinsicsTTIImpl::getUserCost(const User* U, ArrayRef<const Value*> Operands, TTI::TargetCostKind CostKind)
468479
#endif
469-
{
470-
const Function* F = dyn_cast<Function>(U);
471-
if(F != nullptr)
472-
{
473-
IGC::CodeGenContext* CGC = this->ctx;
474-
if (!CGC->enableFunctionCall() && !GenISAIntrinsic::isIntrinsic(F) &&
475-
!F->isIntrinsic()) {
476-
// If subroutine call is not enabled but we have function call. They
477-
// are not inlined. e.g. due to two-phase inlining. Return function
478-
// size instead of to avoid under-estimating the cost of function call.
479-
//
480-
// FIXME: We need to collect the cost following calling graph. However,
481-
// as LLVM's ininer only support bottom-up inlining currently. That's
482-
// not a big issue so far.
483-
//
484-
// FIXME: We also need to consider the case where sub-routine call is
485-
// enabled.
486-
unsigned FuncSize = countTotalInstructions(F, false);
487-
return TargetTransformInfo::TCC_Basic * FuncSize;
480+
{
481+
const Function* F = dyn_cast<Function>(U);
482+
if (F != nullptr)
483+
{
484+
IGC::CodeGenContext* CGC = this->ctx;
485+
if (!CGC->enableFunctionCall() && !GenISAIntrinsic::isIntrinsic(F) &&
486+
!F->isIntrinsic()) {
487+
// If subroutine call is not enabled but we have function call. They
488+
// are not inlined. e.g. due to two-phase inlining. Return function
489+
// size instead of to avoid under-estimating the cost of function call.
490+
//
491+
// FIXME: We need to collect the cost following calling graph. However,
492+
// as LLVM's ininer only support bottom-up inlining currently. That's
493+
// not a big issue so far.
494+
//
495+
// FIXME: We also need to consider the case where sub-routine call is
496+
// enabled.
497+
unsigned FuncSize = countTotalInstructions(F, false);
498+
return TargetTransformInfo::TCC_Basic * FuncSize;
499+
}
488500
}
489-
}
490-
return BaseT::getUserCost(U, Operands, CostKind);
491-
}
501+
return BaseT::getUserCost(U, Operands, CostKind);
502+
}
492503
#endif
493504

494505
} // namespace llvm

IGC/common/MDFrameWork.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ namespace IGC
246246
unsigned FloatCvtIntRoundingMode = IGC::ROUND_TO_ZERO;
247247

248248
unsigned VISAPreSchedRPThreshold = 0;
249+
unsigned SetLoopUnrollThreshold = 0;
249250
bool UnsafeMathOptimizations = false;
250251
bool FiniteMathOnly = false;
251252
bool FastRelaxedMath = false;
@@ -306,6 +307,7 @@ namespace IGC
306307
unsigned char forcedSIMDSize = 0; // 0 means not forced
307308
unsigned int forceTotalGRFNum = 0; // 0 means not forced
308309
unsigned int VISAPreSchedRPThreshold = 0; // 0 means use the default
310+
unsigned int SetLoopUnrollThreshold = 0; // 0 means use the default
309311
bool forcedVISAPreRAScheduler = false;
310312
// disables dispatch along y and tiled order optimizations
311313
bool disableLocalIdOrderOptimizations = false;

0 commit comments

Comments
 (0)