@@ -71,7 +71,7 @@ namespace llvm {
71
71
72
72
GenISAIntrinsic::ID ID = inst->getIntrinsicID ();
73
73
if (/* ID == llvm::GenISAIntrinsic::GenISA_typedwrite ||
74
- ID == llvm::GenISAIntrinsic::GenISA_typedread ||*/
74
+ ID == llvm::GenISAIntrinsic::GenISA_typedread ||*/
75
75
ID == llvm::GenISAIntrinsic::GenISA_URBRead ||
76
76
isURBWriteIntrinsic (inst) ||
77
77
ID == llvm::GenISAIntrinsic::GenISA_ldstructured)
@@ -89,7 +89,7 @@ namespace llvm {
89
89
llvm::BasicBlock* BB = const_cast <llvm::BasicBlock*>(&*BBI);
90
90
for (auto II = BB->begin (); II != BB->end (); II++)
91
91
{
92
- if (llvm::GenIntrinsicInst * pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(II))
92
+ if (llvm::GenIntrinsicInst* pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(II))
93
93
{
94
94
if (CheckSendMsg && isSendMessage (pIntrinsic))
95
95
{
@@ -108,9 +108,9 @@ namespace llvm {
108
108
109
109
void GenIntrinsicsTTIImpl::getUnrollingPreferences (Loop* L,
110
110
#if LLVM_VERSION_MAJOR >= 7
111
- ScalarEvolution & SE,
111
+ ScalarEvolution& SE,
112
112
#endif
113
- TTI::UnrollingPreferences & UP)
113
+ TTI::UnrollingPreferences& UP)
114
114
{
115
115
unsigned LoopUnrollThreshold = ctx->m_DriverInfo .GetLoopUnrollThreshold ();
116
116
@@ -119,6 +119,17 @@ namespace llvm {
119
119
{
120
120
LoopUnrollThreshold = IGC_GET_FLAG_VALUE (SetLoopUnrollThreshold);
121
121
}
122
+ else
123
+ {
124
+ if (ctx->type == ShaderType::COMPUTE_SHADER && ctx->getModuleMetaData ()->csInfo .SetLoopUnrollThreshold > 0 )
125
+ {
126
+ LoopUnrollThreshold = ctx->getModuleMetaData ()->csInfo .SetLoopUnrollThreshold ;
127
+ }
128
+ else if (ctx->type == ShaderType::PIXEL_SHADER && ctx->getModuleMetaData ()->compOpt .SetLoopUnrollThreshold > 0 )
129
+ {
130
+ LoopUnrollThreshold = ctx->getModuleMetaData ()->compOpt .SetLoopUnrollThreshold ;
131
+ }
132
+ }
122
133
unsigned totalInstCountInShader = countTotalInstructions (L->getBlocks ()[0 ]->getParent ());
123
134
uint32_t registerPressureEst = (uint32_t )(IGC_GET_FLAG_VALUE (SetRegisterPressureThresholdForLoopUnroll) * (ctx->getNumGRFPerThread () / 128.0 ));
124
135
bool lowPressure = (this ->ctx ->m_tempCount < registerPressureEst) && (totalInstCountInShader < LoopUnrollThreshold);
@@ -131,16 +142,16 @@ namespace llvm {
131
142
UP.PartialThreshold = LoopUnrollThreshold;
132
143
UP.Partial = true ;
133
144
}
134
- else // for high registry pressure shaders, limit the unrolling to small loops and only fully unroll
145
+ else // for high registry pressure shaders, limit the unrolling to small loops and only fully unroll
135
146
{
136
- if (IGC_GET_FLAG_VALUE (SetLoopUnrollThresholdForHighRegPressure) != 0 )
147
+ if (IGC_GET_FLAG_VALUE (SetLoopUnrollThresholdForHighRegPressure) != 0 )
137
148
UP.Threshold = IGC_GET_FLAG_VALUE (SetLoopUnrollThresholdForHighRegPressure);
138
149
else
139
150
UP.Threshold = 200 ;
140
151
}
141
152
142
153
#if LLVM_VERSION_MAJOR == 4
143
- ScalarEvolution * SE = &dummyPass->getAnalysisIfAvailable <ScalarEvolutionWrapperPass>()->getSE ();
154
+ ScalarEvolution* SE = &dummyPass->getAnalysisIfAvailable <ScalarEvolutionWrapperPass>()->getSE ();
144
155
if (!SE)
145
156
return ;
146
157
#endif
@@ -157,7 +168,7 @@ namespace llvm {
157
168
if (ExitingBlock) {
158
169
if (UP.Partial ) {
159
170
IGCLLVM::TerminatorInst* Term = ExitingBlock->getTerminator ();
160
- if (BranchInst * BI = dyn_cast<BranchInst>(Term))
171
+ if (BranchInst* BI = dyn_cast<BranchInst>(Term))
161
172
{
162
173
if (dyn_cast<FCmpInst>(BI->getCondition ()))
163
174
{
@@ -295,7 +306,7 @@ namespace llvm {
295
306
296
307
for (I = instructionList.begin (); I != instructionList.end (); I++)
297
308
{
298
- if (llvm::GenIntrinsicInst * pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(I))
309
+ if (llvm::GenIntrinsicInst* pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(I))
299
310
{
300
311
if (isSendMessage (pIntrinsic))
301
312
{
@@ -307,8 +318,8 @@ namespace llvm {
307
318
unsigned int estimateUnrolledInstCount = (instCount + sendMessage * 4 ) * TripCount;
308
319
unsigned int unrollLimitInstCount = LoopUnrollThreshold > totalInstCountInShader ? LoopUnrollThreshold - totalInstCountInShader : 0 ;
309
320
bool limitUnrolling = (estimateUnrolledInstCount > unrollLimitInstCount) ||
310
- (TripCount > unrollLimitInstCount) ||
311
- (instCount + sendMessage * 4 > unrollLimitInstCount);
321
+ (TripCount > unrollLimitInstCount) ||
322
+ (instCount + sendMessage * 4 > unrollLimitInstCount);
312
323
313
324
// if the loop doesn't have sample, skip the unrolling parameter change
314
325
if (!sendMessage)
@@ -349,17 +360,17 @@ namespace llvm {
349
360
// The following is only available and required from LLVM 3.7+.
350
361
UP.AllowExpensiveTripCount = true ;
351
362
352
- if (MDNode * LoopID = L->getLoopID ())
363
+ if (MDNode* LoopID = L->getLoopID ())
353
364
{
354
365
const llvm::StringRef maxIterMetadataNames = " spv.loop.iterations.max" ;
355
366
#if LLVM_VERSION_MAJOR < 11
356
367
const llvm::StringRef peelCountMetadataNames = " spv.loop.peel.count" ;
357
368
#endif
358
369
for (unsigned i = 0 ; i < LoopID->getNumOperands (); ++i)
359
370
{
360
- if (MDNode * MD = llvm::dyn_cast<MDNode>(LoopID->getOperand (i)))
371
+ if (MDNode* MD = llvm::dyn_cast<MDNode>(LoopID->getOperand (i)))
361
372
{
362
- if (MDString * S = llvm::dyn_cast<MDString>(MD->getOperand (0 )))
373
+ if (MDString* S = llvm::dyn_cast<MDString>(MD->getOperand (0 )))
363
374
{
364
375
if (maxIterMetadataNames.equals (S->getString ()))
365
376
{
@@ -381,21 +392,21 @@ namespace llvm {
381
392
}
382
393
383
394
#if LLVM_VERSION_MAJOR >= 11
384
- // [LLVM-UPGRADE] Peeling information was separated
385
- // https://github.com/llvm/llvm-project/commit/e541e1b757237172c247904b670c9894d6b3759d
395
+ // [LLVM-UPGRADE] Peeling information was separated
396
+ // https://github.com/llvm/llvm-project/commit/e541e1b757237172c247904b670c9894d6b3759d
386
397
387
- void GenIntrinsicsTTIImpl::getPeelingPreferences (Loop* L, ScalarEvolution& SE,
388
- llvm::TargetTransformInfo::PeelingPreferences& PP)
389
- {
390
- if (MDNode * LoopID = L->getLoopID ())
398
+ void GenIntrinsicsTTIImpl::getPeelingPreferences (Loop* L, ScalarEvolution& SE,
399
+ llvm::TargetTransformInfo::PeelingPreferences& PP)
400
+ {
401
+ if (MDNode* LoopID = L->getLoopID ())
391
402
{
392
403
const llvm::StringRef peelCountMetadataNames = " spv.loop.peel.count" ;
393
404
394
405
for (unsigned i = 0 ; i < LoopID->getNumOperands (); ++i)
395
406
{
396
- if (MDNode * MD = llvm::dyn_cast<MDNode>(LoopID->getOperand (i)))
407
+ if (MDNode* MD = llvm::dyn_cast<MDNode>(LoopID->getOperand (i)))
397
408
{
398
- if (MDString * S = llvm::dyn_cast<MDString>(MD->getOperand (0 )))
409
+ if (MDString* S = llvm::dyn_cast<MDString>(MD->getOperand (0 )))
399
410
{
400
411
if (peelCountMetadataNames.equals (S->getString ()))
401
412
{
@@ -408,12 +419,12 @@ namespace llvm {
408
419
}
409
420
}
410
421
411
- }
422
+ }
412
423
#endif
413
424
414
425
bool GenIntrinsicsTTIImpl::isProfitableToHoist (Instruction* I)
415
426
{
416
- if (auto * CI = dyn_cast<CallInst>(I))
427
+ if (auto * CI = dyn_cast<CallInst>(I))
417
428
{
418
429
if (CI->isConvergent () &&
419
430
#if LLVM_VERSION_MAJOR >= 7
@@ -432,7 +443,7 @@ namespace llvm {
432
443
#if LLVM_VERSION_MAJOR <= 10
433
444
unsigned GenIntrinsicsTTIImpl::getCallCost (const Function* F, ArrayRef<const Value*> Arguments
434
445
#if LLVM_VERSION_MAJOR >= 9
435
- , const User * U
446
+ , const User* U
436
447
#endif
437
448
) {
438
449
IGC::CodeGenContext* CGC = this ->ctx ;
@@ -458,37 +469,37 @@ namespace llvm {
458
469
);
459
470
}
460
471
#else
461
- // [LLVM-UPGRADE] moved from getCallCost to getUserCost
462
- // https://github.com/llvm/llvm-project/commit/2641a19981e71c887bece92074e00d1af3e716c9#diff-dd4bd65dc55d754674d9a945a0d22911
472
+ // [LLVM-UPGRADE] moved from getCallCost to getUserCost
473
+ // https://github.com/llvm/llvm-project/commit/2641a19981e71c887bece92074e00d1af3e716c9#diff-dd4bd65dc55d754674d9a945a0d22911
463
474
464
- #if LLVM_VERSION_MAJOR <= 12
465
- int GenIntrinsicsTTIImpl::getUserCost (const User * U, ArrayRef<const Value *> Operands, TTI::TargetCostKind CostKind)
475
+ #if LLVM_VERSION_MAJOR <= 12
476
+ int GenIntrinsicsTTIImpl::getUserCost (const User* U, ArrayRef<const Value*> Operands, TTI::TargetCostKind CostKind)
466
477
#else
467
- llvm::InstructionCost GenIntrinsicsTTIImpl::getUserCost (const User* U, ArrayRef<const Value*> Operands, TTI::TargetCostKind CostKind)
478
+ llvm::InstructionCost GenIntrinsicsTTIImpl::getUserCost (const User* U, ArrayRef<const Value*> Operands, TTI::TargetCostKind CostKind)
468
479
#endif
469
- {
470
- const Function* F = dyn_cast<Function>(U);
471
- if (F != nullptr )
472
- {
473
- IGC::CodeGenContext* CGC = this ->ctx ;
474
- if (!CGC->enableFunctionCall () && !GenISAIntrinsic::isIntrinsic (F) &&
475
- !F->isIntrinsic ()) {
476
- // If subroutine call is not enabled but we have function call. They
477
- // are not inlined. e.g. due to two-phase inlining. Return function
478
- // size instead of to avoid under-estimating the cost of function call.
479
- //
480
- // FIXME: We need to collect the cost following calling graph. However,
481
- // as LLVM's ininer only support bottom-up inlining currently. That's
482
- // not a big issue so far.
483
- //
484
- // FIXME: We also need to consider the case where sub-routine call is
485
- // enabled.
486
- unsigned FuncSize = countTotalInstructions (F, false );
487
- return TargetTransformInfo::TCC_Basic * FuncSize;
480
+ {
481
+ const Function* F = dyn_cast<Function>(U);
482
+ if (F != nullptr )
483
+ {
484
+ IGC::CodeGenContext* CGC = this ->ctx ;
485
+ if (!CGC->enableFunctionCall () && !GenISAIntrinsic::isIntrinsic (F) &&
486
+ !F->isIntrinsic ()) {
487
+ // If subroutine call is not enabled but we have function call. They
488
+ // are not inlined. e.g. due to two-phase inlining. Return function
489
+ // size instead of to avoid under-estimating the cost of function call.
490
+ //
491
+ // FIXME: We need to collect the cost following calling graph. However,
492
+ // as LLVM's ininer only support bottom-up inlining currently. That's
493
+ // not a big issue so far.
494
+ //
495
+ // FIXME: We also need to consider the case where sub-routine call is
496
+ // enabled.
497
+ unsigned FuncSize = countTotalInstructions (F, false );
498
+ return TargetTransformInfo::TCC_Basic * FuncSize;
499
+ }
488
500
}
489
- }
490
- return BaseT::getUserCost (U, Operands, CostKind);
491
- }
501
+ return BaseT::getUserCost (U, Operands, CostKind);
502
+ }
492
503
#endif
493
504
494
505
} // namespace llvm
0 commit comments