Skip to content

Commit a3b9b6f

Browse files
committed
[AMDGPU] Increase inline threshold when the callee only has one live use
Currently we will not inline a large function even if it only has one live use. This could significantly impact the performance because CSR spill is very expensive. The goal of this PR is trying to force the inlining if there is only one live use by adjusting the inlining threshold, which is a configurable number. The default value is 15000, which borrows from `InlineConstants::LastCallToStaticBonus`. I'm not sure if this is a good number, and if this is the right way to do that. After making this change, the callee in my local test case can finally be inlined, but the cost is still very close to the threshold: `cost=14010, threshold=170775`. Speaking of the test, how are we gonna test this? Do we want to include a giant IR file? Fixes SWDEV-471398.
1 parent e0b840a commit a3b9b6f

File tree

7 files changed

+52
-1
lines changed

7 files changed

+52
-1
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,9 @@ class TargetTransformInfo {
352352
unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const;
353353
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const;
354354

355+
/// \returns The bonus of inlining the last call to a static function.
356+
int getInliningLastCallToStaticBonus() const;
357+
355358
/// \returns A value to be added to the inlining threshold.
356359
unsigned adjustInliningThreshold(const CallBase *CB) const;
357360

@@ -1822,6 +1825,7 @@ class TargetTransformInfo::Concept {
18221825
virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const = 0;
18231826
virtual unsigned
18241827
getInliningCostBenefitAnalysisProfitableMultiplier() const = 0;
1828+
virtual int getInliningLastCallToStaticBonus() const = 0;
18251829
virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
18261830
virtual int getInlinerVectorBonusPercent() const = 0;
18271831
virtual unsigned getCallerAllocaCost(const CallBase *CB,
@@ -2225,6 +2229,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
22252229
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
22262230
return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
22272231
}
2232+
int getInliningLastCallToStaticBonus() const override {
2233+
return Impl.getInliningLastCallToStaticBonus();
2234+
}
22282235
int getInlinerVectorBonusPercent() const override {
22292236
return Impl.getInlinerVectorBonusPercent();
22302237
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ class TargetTransformInfoImplBase {
7474
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
7575
return 8;
7676
}
77+
int getInliningLastCallToStaticBonus() const {
78+
// This is same as InlineConstants::LastCallToStaticBonus.
79+
return 15000;
80+
}
7781
unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
7882
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
7983
return 0;

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1943,7 +1943,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
19431943
// and the callsite.
19441944
int SingleBBBonusPercent = 50;
19451945
int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1946-
int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
1946+
int LastCallToStaticBonus = TTI.getInliningLastCallToStaticBonus();
19471947

19481948
// Lambda to set all the above bonus and bonus percentages to 0.
19491949
auto DisallowAllBonuses = [&]() {

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,10 @@ TargetTransformInfo::getInliningCostBenefitAnalysisProfitableMultiplier()
228228
return TTIImpl->getInliningCostBenefitAnalysisProfitableMultiplier();
229229
}
230230

231+
int TargetTransformInfo::getInliningLastCallToStaticBonus() const {
232+
return TTIImpl->getInliningLastCallToStaticBonus();
233+
}
234+
231235
unsigned
232236
TargetTransformInfo::adjustInliningThreshold(const CallBase *CB) const {
233237
return TTIImpl->adjustInliningThreshold(CB);

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,10 @@ static cl::opt<size_t> InlineMaxBB(
7575
cl::desc("Maximum number of BBs allowed in a function after inlining"
7676
" (compile time constraint)"));
7777

78+
static cl::opt<unsigned> InlineThresholdOneLiveUse(
79+
"amdgpu-inline-threshold-one-live-use", cl::Hidden, cl::init(165000),
80+
cl::desc("Threshold added when the callee only has one live use"));
81+
7882
static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
7983
unsigned Depth = 0) {
8084
const Instruction *I = dyn_cast<Instruction>(Cond);
@@ -1299,6 +1303,10 @@ static unsigned getCallArgsTotalAllocaSize(const CallBase *CB,
12991303
return AllocaSize;
13001304
}
13011305

1306+
int GCNTTIImpl::getInliningLastCallToStaticBonus() const {
1307+
return InlineThresholdOneLiveUse;
1308+
}
1309+
13021310
unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
13031311
unsigned Threshold = adjustInliningThresholdUsingCallee(CB, TLI, this);
13041312

@@ -1307,6 +1315,7 @@ unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
13071315
unsigned AllocaSize = getCallArgsTotalAllocaSize(CB, DL);
13081316
if (AllocaSize > 0)
13091317
Threshold += ArgAllocaCost;
1318+
13101319
return Threshold;
13111320
}
13121321

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
240240
bool areInlineCompatible(const Function *Caller,
241241
const Function *Callee) const;
242242

243+
int getInliningLastCallToStaticBonus() const;
243244
unsigned getInliningThresholdMultiplier() const { return 11; }
244245
unsigned adjustInliningThreshold(const CallBase *CB) const;
245246
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=inline -inline-threshold=0 -debug-only=inline-cost %s -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-DEFAULT %s
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=inline -inline-threshold=0 -debug-only=inline-cost %s -amdgpu-inline-threshold-one-live-use=1024 -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-USER %s
3+
; REQUIRES: asserts
4+
5+
; CHECK: Analyzing call of callee_not_only_one_live_use... (caller:caller)
6+
; CHECK: Cost: -30
7+
; CHECK: Threshold: 0
8+
; CHECK: Analyzing call of callee_only_one_live_use... (caller:caller)
9+
; CHECK-DEFAULT: Cost: -165030
10+
; CHECK-USER: Cost: -1054
11+
; CHECK: Threshold: 0
12+
13+
define internal void @callee_not_only_one_live_use() {
14+
ret void
15+
}
16+
17+
define internal void @callee_only_one_live_use() {
18+
ret void
19+
}
20+
21+
define void @caller() {
22+
call void @callee_not_only_one_live_use()
23+
call void @callee_not_only_one_live_use()
24+
call void @callee_only_one_live_use()
25+
ret void
26+
}

0 commit comments

Comments
 (0)