17
17
#include " AMDGPUTargetTransformInfo.h"
18
18
#include " AMDGPUTargetMachine.h"
19
19
#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
20
- #include " llvm/Analysis/InlineCost.h"
21
20
#include " llvm/Analysis/LoopInfo.h"
22
21
#include " llvm/Analysis/ValueTracking.h"
23
- #include " llvm/CodeGen/Analysis.h"
24
22
#include " llvm/IR/IRBuilder.h"
25
23
#include " llvm/IR/IntrinsicsAMDGPU.h"
26
24
#include " llvm/IR/PatternMatch.h"
@@ -1169,57 +1167,10 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
1169
1167
return true ;
1170
1168
}
1171
1169
1172
- static unsigned adjustInliningThresholdUsingCallee (const Function *Callee,
1173
- const SITargetLowering *TLI,
1174
- const GCNTTIImpl *TTIImpl) {
1175
- const int NrOfSGPRUntilSpill = 26 ;
1176
- const int NrOfVGPRUntilSpill = 32 ;
1177
-
1178
- const DataLayout &DL = TTIImpl->getDataLayout ();
1179
-
1180
- unsigned adjustThreshold = 0 ;
1181
- int SGPRsInUse = 0 ;
1182
- int VGPRsInUse = 0 ;
1183
- for (const Argument &A : Callee->args ()) {
1184
- SmallVector<EVT, 4 > ValueVTs;
1185
- ComputeValueVTs (*TLI, DL, A.getType (), ValueVTs);
1186
- for (auto ArgVT : ValueVTs) {
1187
- unsigned CCRegNum = TLI->getNumRegistersForCallingConv (
1188
- A.getContext (), Callee->getCallingConv (), ArgVT);
1189
- if (AMDGPU::isArgPassedInSGPR (&A))
1190
- SGPRsInUse += CCRegNum;
1191
- else
1192
- VGPRsInUse += CCRegNum;
1193
- }
1194
- }
1195
-
1196
- // The cost of passing function arguments through the stack:
1197
- // 1 instruction to put a function argument on the stack in the caller.
1198
- // 1 instruction to take a function argument from the stack in callee.
1199
- // 1 instruction is explicitly take care of data dependencies in callee
1200
- // function.
1201
- InstructionCost ArgStackCost (1 );
1202
- ArgStackCost += const_cast <GCNTTIImpl *>(TTIImpl)->getMemoryOpCost (
1203
- Instruction::Store, Type::getInt32Ty (Callee->getContext ()), Align (4 ),
1204
- AMDGPUAS::PRIVATE_ADDRESS, TTI::TCK_SizeAndLatency);
1205
- ArgStackCost += const_cast <GCNTTIImpl *>(TTIImpl)->getMemoryOpCost (
1206
- Instruction::Load, Type::getInt32Ty (Callee->getContext ()), Align (4 ),
1207
- AMDGPUAS::PRIVATE_ADDRESS, TTI::TCK_SizeAndLatency);
1208
-
1209
- // The penalty cost is computed relative to the cost of instructions and does
1210
- // not model any storage costs.
1211
- adjustThreshold += std::max (0 , SGPRsInUse - NrOfSGPRUntilSpill) *
1212
- *ArgStackCost.getValue () * InlineConstants::getInstrCost ();
1213
- adjustThreshold += std::max (0 , VGPRsInUse - NrOfVGPRUntilSpill) *
1214
- *ArgStackCost.getValue () * InlineConstants::getInstrCost ();
1215
- return adjustThreshold;
1216
- }
1217
-
1218
1170
unsigned GCNTTIImpl::adjustInliningThreshold (const CallBase *CB) const {
1219
1171
// If we have a pointer to private array passed into a function
1220
1172
// it will not be optimized out, leaving scratch usage.
1221
1173
// Increase the inline threshold to allow inlining in this case.
1222
- unsigned adjustThreshold = 0 ;
1223
1174
uint64_t AllocaSize = 0 ;
1224
1175
SmallPtrSet<const AllocaInst *, 8 > AIVisited;
1225
1176
for (Value *PtrArg : CB->args ()) {
@@ -1241,10 +1192,9 @@ unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
1241
1192
}
1242
1193
}
1243
1194
}
1244
- adjustThreshold +=
1245
- adjustInliningThresholdUsingCallee (CB->getCalledFunction (), TLI, this );
1246
- adjustThreshold += AllocaSize ? ArgAllocaCost : AllocaSize;
1247
- return adjustThreshold;
1195
+ if (AllocaSize)
1196
+ return ArgAllocaCost;
1197
+ return 0 ;
1248
1198
}
1249
1199
1250
1200
void GCNTTIImpl::getUnrollingPreferences (Loop *L, ScalarEvolution &SE,
0 commit comments