Skip to content

Commit 7cde4f2

Browse files
committed
[AMDGPU] Increase inline threshold when the callee only has one live use
Currently we will not inline a large function even if it only has one live use. This could significantly impact the performance because CSR spill is very expensive. The goal of this PR is trying to force the inlining if there is only one live use by adjusting the inlining threshold, which is a configurable number. The default value is 15000, which borrows from `InlineConstants::LastCallToStaticBonus`. I'm not sure if this is a good number, and if this is the right way to do that. After making this change, the callee in my local test case can finally be inlined, but the cost is still very close to the threshold: `cost=14010, threshold=170775`. Speaking of the test, how are we gonna test this? Do we want to include a giant IR file? Fixes SWDEV-471398.
1 parent f0ed31c commit 7cde4f2

File tree

10 files changed

+56
-11
lines changed

10 files changed

+56
-11
lines changed

llvm/include/llvm/Analysis/InlineAdvisor.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,8 @@ getDevelopmentModeAdvisor(Module &M, ModuleAnalysisManager &MAM,
371371
/// using that cost, so we won't do so from this function. Return std::nullopt
372372
/// if inlining should not be attempted.
373373
std::optional<InlineCost>
374-
shouldInline(CallBase &CB, function_ref<InlineCost(CallBase &CB)> GetInlineCost,
374+
shouldInline(CallBase &CB, TargetTransformInfo &CalleeTTI,
375+
function_ref<InlineCost(CallBase &CB)> GetInlineCost,
375376
OptimizationRemarkEmitter &ORE, bool EnableDeferral = true);
376377

377378
/// Emit ORE message.

llvm/include/llvm/Analysis/InlineCost.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ const int OptAggressiveThreshold = 250;
4747
int getInstrCost();
4848
const int IndirectCallThreshold = 100;
4949
const int LoopPenalty = 25;
50-
const int LastCallToStaticBonus = 15000;
5150
const int ColdccPenalty = 2000;
5251
/// Do not inline functions which allocate this many bytes on the stack
5352
/// when the caller is recursive.

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,9 @@ class TargetTransformInfo {
352352
unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const;
353353
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const;
354354

355+
/// \returns The bonus of inlining the last call to a static function.
356+
int getInliningLastCallToStaticBonus() const;
357+
355358
/// \returns A value to be added to the inlining threshold.
356359
unsigned adjustInliningThreshold(const CallBase *CB) const;
357360

@@ -1840,6 +1843,7 @@ class TargetTransformInfo::Concept {
18401843
virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const = 0;
18411844
virtual unsigned
18421845
getInliningCostBenefitAnalysisProfitableMultiplier() const = 0;
1846+
virtual int getInliningLastCallToStaticBonus() const = 0;
18431847
virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
18441848
virtual int getInlinerVectorBonusPercent() const = 0;
18451849
virtual unsigned getCallerAllocaCost(const CallBase *CB,
@@ -2250,6 +2254,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
22502254
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
22512255
return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
22522256
}
2257+
int getInliningLastCallToStaticBonus() const override {
2258+
return Impl.getInliningLastCallToStaticBonus();
2259+
}
22532260
int getInlinerVectorBonusPercent() const override {
22542261
return Impl.getInlinerVectorBonusPercent();
22552262
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ class TargetTransformInfoImplBase {
7474
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
7575
return 8;
7676
}
77+
int getInliningLastCallToStaticBonus() const {
78+
// This is the value of InlineConstants::LastCallToStaticBonus before this
79+
// function was introduced.
80+
return 15000;
81+
}
7782
unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
7883
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
7984
return 0;

llvm/lib/Analysis/InlineAdvisor.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -151,17 +151,17 @@ std::optional<llvm::InlineCost> static getDefaultInlineAdvice(
151151
return FAM.getResult<TargetLibraryAnalysis>(F);
152152
};
153153

154+
Function &Callee = *CB.getCalledFunction();
155+
auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
154156
auto GetInlineCost = [&](CallBase &CB) {
155-
Function &Callee = *CB.getCalledFunction();
156-
auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
157157
bool RemarksEnabled =
158158
Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
159159
DEBUG_TYPE);
160160
return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, GetTLI,
161161
GetBFI, PSI, RemarksEnabled ? &ORE : nullptr);
162162
};
163163
return llvm::shouldInline(
164-
CB, GetInlineCost, ORE,
164+
CB, CalleeTTI, GetInlineCost, ORE,
165165
Params.EnableDeferral.value_or(EnableInlineDeferral));
166166
}
167167

@@ -247,7 +247,8 @@ bool InlineAdvisorAnalysis::Result::tryCreate(
247247
/// \p TotalSecondaryCost will be set to the estimated cost of inlining the
248248
/// caller if \p CB is suppressed for inlining.
249249
static bool
250-
shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost,
250+
shouldBeDeferred(Function *Caller, TargetTransformInfo &CalleeTTI,
251+
InlineCost IC, int &TotalSecondaryCost,
251252
function_ref<InlineCost(CallBase &CB)> GetInlineCost) {
252253
// For now we only handle local or inline functions.
253254
if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
@@ -320,7 +321,7 @@ shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost,
320321
// be removed entirely. We did not account for this above unless there
321322
// is only one caller of Caller.
322323
if (ApplyLastCallBonus)
323-
TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
324+
TotalSecondaryCost -= CalleeTTI.getInliningLastCallToStaticBonus();
324325

325326
// If InlineDeferralScale is negative, then ignore the cost of primary
326327
// inlining -- IC.getCost() multiplied by the number of callers to Caller.
@@ -374,7 +375,7 @@ void llvm::setInlineRemark(CallBase &CB, StringRef Message) {
374375
/// using that cost, so we won't do so from this function. Return std::nullopt
375376
/// if inlining should not be attempted.
376377
std::optional<InlineCost>
377-
llvm::shouldInline(CallBase &CB,
378+
llvm::shouldInline(CallBase &CB, TargetTransformInfo &CalleeTTI,
378379
function_ref<InlineCost(CallBase &CB)> GetInlineCost,
379380
OptimizationRemarkEmitter &ORE, bool EnableDeferral) {
380381
using namespace ore;
@@ -413,8 +414,8 @@ llvm::shouldInline(CallBase &CB,
413414
}
414415

415416
int TotalSecondaryCost = 0;
416-
if (EnableDeferral &&
417-
shouldBeDeferred(Caller, IC, TotalSecondaryCost, GetInlineCost)) {
417+
if (EnableDeferral && shouldBeDeferred(Caller, CalleeTTI, IC,
418+
TotalSecondaryCost, GetInlineCost)) {
418419
LLVM_DEBUG(dbgs() << " NOT Inlining: " << CB
419420
<< " Cost = " << IC.getCost()
420421
<< ", outer Cost = " << TotalSecondaryCost << '\n');

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1943,7 +1943,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
19431943
// and the callsite.
19441944
int SingleBBBonusPercent = 50;
19451945
int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
1946-
int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
1946+
int LastCallToStaticBonus = TTI.getInliningLastCallToStaticBonus();
19471947

19481948
// Lambda to set all the above bonus and bonus percentages to 0.
19491949
auto DisallowAllBonuses = [&]() {

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,10 @@ TargetTransformInfo::getInliningCostBenefitAnalysisProfitableMultiplier()
228228
return TTIImpl->getInliningCostBenefitAnalysisProfitableMultiplier();
229229
}
230230

231+
int TargetTransformInfo::getInliningLastCallToStaticBonus() const {
232+
return TTIImpl->getInliningLastCallToStaticBonus();
233+
}
234+
231235
unsigned
232236
TargetTransformInfo::adjustInliningThreshold(const CallBase *CB) const {
233237
return TTIImpl->adjustInliningThreshold(CB);

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,6 +1314,11 @@ static unsigned getCallArgsTotalAllocaSize(const CallBase *CB,
13141314
return AllocaSize;
13151315
}
13161316

1317+
int GCNTTIImpl::getInliningLastCallToStaticBonus() const {
1318+
return BaseT::getInliningLastCallToStaticBonus() *
1319+
getInliningThresholdMultiplier();
1320+
}
1321+
13171322
unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
13181323
unsigned Threshold = adjustInliningThresholdUsingCallee(CB, TLI, this);
13191324

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
243243
bool areInlineCompatible(const Function *Caller,
244244
const Function *Callee) const;
245245

246+
int getInliningLastCallToStaticBonus() const;
246247
unsigned getInliningThresholdMultiplier() const { return 11; }
247248
unsigned adjustInliningThreshold(const CallBase *CB) const;
248249
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=inline -inline-threshold=0 -debug-only=inline-cost %s -o - 2>&1 | FileCheck %s
2+
; REQUIRES: asserts
3+
4+
; CHECK: Analyzing call of callee_not_only_one_live_use... (caller:caller)
5+
; CHECK: Cost: -30
6+
; CHECK: Analyzing call of callee_only_one_live_use... (caller:caller)
7+
; CHECK: Cost: -165030
8+
9+
define internal void @callee_not_only_one_live_use() {
10+
ret void
11+
}
12+
13+
define internal void @callee_only_one_live_use() {
14+
ret void
15+
}
16+
17+
define void @caller() {
18+
call void @callee_not_only_one_live_use()
19+
call void @callee_not_only_one_live_use()
20+
call void @callee_only_one_live_use()
21+
ret void
22+
}

0 commit comments

Comments
 (0)