Skip to content

Commit 1beba44

Browse files
committed
Revert "[AMDGPU] Modify adjustInliningThreshold to also consider the cost of passing function arguments through the stack"
This reverts commit 142c28f.
1 parent 28b9277 commit 1beba44

7 files changed

+3
-836
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 3 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,8 @@
1717
#include "AMDGPUTargetTransformInfo.h"
1818
#include "AMDGPUTargetMachine.h"
1919
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20-
#include "llvm/Analysis/InlineCost.h"
2120
#include "llvm/Analysis/LoopInfo.h"
2221
#include "llvm/Analysis/ValueTracking.h"
23-
#include "llvm/CodeGen/Analysis.h"
2422
#include "llvm/IR/IRBuilder.h"
2523
#include "llvm/IR/IntrinsicsAMDGPU.h"
2624
#include "llvm/IR/PatternMatch.h"
@@ -1169,57 +1167,10 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
11691167
return true;
11701168
}
11711169

1172-
static unsigned adjustInliningThresholdUsingCallee(const Function *Callee,
1173-
const SITargetLowering *TLI,
1174-
const GCNTTIImpl *TTIImpl) {
1175-
const int NrOfSGPRUntilSpill = 26;
1176-
const int NrOfVGPRUntilSpill = 32;
1177-
1178-
const DataLayout &DL = TTIImpl->getDataLayout();
1179-
1180-
unsigned adjustThreshold = 0;
1181-
int SGPRsInUse = 0;
1182-
int VGPRsInUse = 0;
1183-
for (const Argument &A : Callee->args()) {
1184-
SmallVector<EVT, 4> ValueVTs;
1185-
ComputeValueVTs(*TLI, DL, A.getType(), ValueVTs);
1186-
for (auto ArgVT : ValueVTs) {
1187-
unsigned CCRegNum = TLI->getNumRegistersForCallingConv(
1188-
A.getContext(), Callee->getCallingConv(), ArgVT);
1189-
if (AMDGPU::isArgPassedInSGPR(&A))
1190-
SGPRsInUse += CCRegNum;
1191-
else
1192-
VGPRsInUse += CCRegNum;
1193-
}
1194-
}
1195-
1196-
// The cost of passing function arguments through the stack:
1197-
// 1 instruction to put a function argument on the stack in the caller.
1198-
// 1 instruction to take a function argument from the stack in callee.
1199-
// 1 instruction is explicitly take care of data dependencies in callee
1200-
// function.
1201-
InstructionCost ArgStackCost(1);
1202-
ArgStackCost += const_cast<GCNTTIImpl *>(TTIImpl)->getMemoryOpCost(
1203-
Instruction::Store, Type::getInt32Ty(Callee->getContext()), Align(4),
1204-
AMDGPUAS::PRIVATE_ADDRESS, TTI::TCK_SizeAndLatency);
1205-
ArgStackCost += const_cast<GCNTTIImpl *>(TTIImpl)->getMemoryOpCost(
1206-
Instruction::Load, Type::getInt32Ty(Callee->getContext()), Align(4),
1207-
AMDGPUAS::PRIVATE_ADDRESS, TTI::TCK_SizeAndLatency);
1208-
1209-
// The penalty cost is computed relative to the cost of instructions and does
1210-
// not model any storage costs.
1211-
adjustThreshold += std::max(0, SGPRsInUse - NrOfSGPRUntilSpill) *
1212-
*ArgStackCost.getValue() * InlineConstants::getInstrCost();
1213-
adjustThreshold += std::max(0, VGPRsInUse - NrOfVGPRUntilSpill) *
1214-
*ArgStackCost.getValue() * InlineConstants::getInstrCost();
1215-
return adjustThreshold;
1216-
}
1217-
12181170
unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
12191171
// If we have a pointer to private array passed into a function
12201172
// it will not be optimized out, leaving scratch usage.
12211173
// Increase the inline threshold to allow inlining in this case.
1222-
unsigned adjustThreshold = 0;
12231174
uint64_t AllocaSize = 0;
12241175
SmallPtrSet<const AllocaInst *, 8> AIVisited;
12251176
for (Value *PtrArg : CB->args()) {
@@ -1241,10 +1192,9 @@ unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
12411192
}
12421193
}
12431194
}
1244-
adjustThreshold +=
1245-
adjustInliningThresholdUsingCallee(CB->getCalledFunction(), TLI, this);
1246-
adjustThreshold += AllocaSize ? ArgAllocaCost : AllocaSize;
1247-
return adjustThreshold;
1195+
if (AllocaSize)
1196+
return ArgAllocaCost;
1197+
return 0;
12481198
}
12491199

12501200
void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-stack-argument-i64.ll

Lines changed: 0 additions & 100 deletions
This file was deleted.

llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-stack-argument.ll

Lines changed: 0 additions & 164 deletions
This file was deleted.

0 commit comments

Comments
 (0)