Skip to content

Commit 2ec29b7

Browse files
committed
[InlineCost] Cache collectEphemeralValues() to save compile time
`CallAnalyzer::analyze()` can take several hours to run if the function contains thousands of @llvm.assume() calls and there are thousands of callsites. The time is spent in `collectEphemeralvalues()`. This patch adds a caching and will only collect the ephemeral values once per function.
1 parent 32dffdc commit 2ec29b7

File tree

4 files changed

+64
-26
lines changed

4 files changed

+64
-26
lines changed

llvm/include/llvm/Analysis/InlineAdvisor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ class DefaultInlineAdvisor : public InlineAdvisor {
236236
std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
237237

238238
InlineParams Params;
239+
EphValuesCacheT EphValuesCache;
239240
};
240241

241242
/// Used for dynamically registering InlineAdvisors as plugins

llvm/include/llvm/Analysis/InlineCost.h

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#define LLVM_ANALYSIS_INLINECOST_H
1515

1616
#include "llvm/ADT/APInt.h"
17+
#include "llvm/ADT/MapVector.h"
1718
#include "llvm/ADT/STLFunctionalExtras.h"
1819
#include "llvm/Analysis/InlineModelFeatureMaps.h"
1920
#include "llvm/IR/PassManager.h"
@@ -31,6 +32,9 @@ class Function;
3132
class ProfileSummaryInfo;
3233
class TargetTransformInfo;
3334
class TargetLibraryInfo;
35+
class Value;
36+
37+
using EphValuesCacheT = MapVector<Function *, SmallPtrSet<const Value *, 32>>;
3438

3539
namespace InlineConstants {
3640
// Various thresholds used by inline cost analysis.
@@ -273,14 +277,13 @@ int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call,
273277
///
274278
/// Also note that calling this function *dynamically* computes the cost of
275279
/// inlining the callsite. It is an expensive, heavyweight call.
276-
InlineCost
277-
getInlineCost(CallBase &Call, const InlineParams &Params,
278-
TargetTransformInfo &CalleeTTI,
279-
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
280-
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
281-
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
282-
ProfileSummaryInfo *PSI = nullptr,
283-
OptimizationRemarkEmitter *ORE = nullptr);
280+
InlineCost getInlineCost(
281+
CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
282+
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
283+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
284+
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
285+
ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr,
286+
EphValuesCacheT *EphValuesCache = nullptr);
284287

285288
/// Get an InlineCost with the callee explicitly specified.
286289
/// This allows you to calculate the cost of inlining a function via a
@@ -294,7 +297,8 @@ getInlineCost(CallBase &Call, Function *Callee, const InlineParams &Params,
294297
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
295298
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
296299
ProfileSummaryInfo *PSI = nullptr,
297-
OptimizationRemarkEmitter *ORE = nullptr);
300+
OptimizationRemarkEmitter *ORE = nullptr,
301+
EphValuesCacheT *EphValuesCache = nullptr);
298302

299303
/// Returns InlineResult::success() if the call site should be always inlined
300304
/// because of user directives, and the inlining is viable. Returns

llvm/lib/Analysis/InlineAdvisor.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,8 @@ void DefaultInlineAdvice::recordInliningImpl() {
133133
}
134134

135135
std::optional<llvm::InlineCost> static getDefaultInlineAdvice(
136-
CallBase &CB, FunctionAnalysisManager &FAM, const InlineParams &Params) {
136+
CallBase &CB, FunctionAnalysisManager &FAM, const InlineParams &Params,
137+
EphValuesCacheT *EphValuesCache = nullptr) {
137138
Function &Caller = *CB.getCaller();
138139
ProfileSummaryInfo *PSI =
139140
FAM.getResult<ModuleAnalysisManagerFunctionProxy>(Caller)
@@ -158,7 +159,8 @@ std::optional<llvm::InlineCost> static getDefaultInlineAdvice(
158159
Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
159160
DEBUG_TYPE);
160161
return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, GetTLI,
161-
GetBFI, PSI, RemarksEnabled ? &ORE : nullptr);
162+
GetBFI, PSI, RemarksEnabled ? &ORE : nullptr,
163+
EphValuesCache);
162164
};
163165
return llvm::shouldInline(
164166
CB, CalleeTTI, GetInlineCost, ORE,
@@ -167,7 +169,7 @@ std::optional<llvm::InlineCost> static getDefaultInlineAdvice(
167169

168170
std::unique_ptr<InlineAdvice>
169171
DefaultInlineAdvisor::getAdviceImpl(CallBase &CB) {
170-
auto OIC = getDefaultInlineAdvice(CB, FAM, Params);
172+
auto OIC = getDefaultInlineAdvice(CB, FAM, Params, &EphValuesCache);
171173
return std::make_unique<DefaultInlineAdvice>(
172174
this, CB, OIC,
173175
FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB.getCaller()));

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,11 @@ static cl::opt<bool> DisableGEPConstOperand(
173173
"disable-gep-const-evaluation", cl::Hidden, cl::init(false),
174174
cl::desc("Disables evaluation of GetElementPtr with constant operands"));
175175

176+
static cl::opt<unsigned> EphValuesCacheSizeLimit(
177+
"inline-ephvalues-cache-size-limit", cl::Hidden, cl::init(8),
178+
cl::desc(
179+
"Clear the cache of ephemeral values if it grows larger than this"));
180+
176181
namespace llvm {
177182
std::optional<int> getStringFnAttrAsInt(const Attribute &Attr) {
178183
if (Attr.isValid()) {
@@ -269,6 +274,10 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
269274
/// easily cacheable. Instead, use the cover function paramHasAttr.
270275
CallBase &CandidateCall;
271276

277+
/// Collecting the ephemeral values over and over again can be expensive, so
278+
/// cache them.
279+
EphValuesCacheT *EphValuesCache;
280+
272281
/// Extension points for handling callsite features.
273282
// Called before a basic block was analyzed.
274283
virtual void onBlockStart(const BasicBlock *BB) {}
@@ -510,10 +519,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
510519
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
511520
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
512521
ProfileSummaryInfo *PSI = nullptr,
513-
OptimizationRemarkEmitter *ORE = nullptr)
522+
OptimizationRemarkEmitter *ORE = nullptr,
523+
EphValuesCacheT *EphValuesCache = nullptr)
514524
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
515525
GetTLI(GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
516-
CandidateCall(Call) {}
526+
CandidateCall(Call), EphValuesCache(EphValuesCache) {}
517527

518528
InlineResult analyze();
519529

@@ -1126,9 +1136,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
11261136
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
11271137
ProfileSummaryInfo *PSI = nullptr,
11281138
OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
1129-
bool IgnoreThreshold = false)
1139+
bool IgnoreThreshold = false, EphValuesCacheT *EphValuesCache = nullptr)
11301140
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1131-
ORE),
1141+
ORE, EphValuesCache),
11321142
ComputeFullInlineCost(OptComputeFullInlineCost ||
11331143
Params.ComputeFullInlineCost || ORE ||
11341144
isCostBenefitAnalysisEnabled()),
@@ -2781,11 +2791,27 @@ InlineResult CallAnalyzer::analyze() {
27812791
NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
27822792
NumAllocaArgs = SROAArgValues.size();
27832793

2784-
// FIXME: If a caller has multiple calls to a callee, we end up recomputing
2785-
// the ephemeral values multiple times (and they're completely determined by
2786-
// the callee, so this is purely duplicate work).
2787-
SmallPtrSet<const Value *, 32> EphValues;
2788-
CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
2794+
// Collecting the ephemeral values of `F` can be expensive, so collect them
2795+
// once per function and cache them for future reuse.
2796+
SmallPtrSet<const Value *, 32> EphValuesSet;
2797+
SmallPtrSet<const Value *, 32> *EphValues;
2798+
auto &AC = GetAssumptionCache(F);
2799+
if (EphValuesCache == nullptr) {
2800+
// No cache is being used, so collect the values every time.
2801+
CodeMetrics::collectEphemeralValues(&F, &AC, EphValuesSet);
2802+
EphValues = &EphValuesSet;
2803+
} else {
2804+
// If the ephemeral values for `F` are in the cache, then reuse them. Else
2805+
// collect them from scratch.
2806+
if (EphValuesCache->size() >= EphValuesCacheSizeLimit)
2807+
// If the cache grows larger than a limit remove the first element.
2808+
// NOTE: This is a linear-time operation so keep the cache size small!
2809+
EphValuesCache->erase(EphValuesCache->begin()->first);
2810+
auto Pair = EphValuesCache->insert({&F, {}});
2811+
if (Pair.second)
2812+
CodeMetrics::collectEphemeralValues(&F, &AC, Pair.first->second);
2813+
EphValues = &Pair.first->second;
2814+
}
27892815

27902816
// The worklist of live basic blocks in the callee *after* inlining. We avoid
27912817
// adding basic blocks of the callee which can be proven to be dead for this
@@ -2824,7 +2850,7 @@ InlineResult CallAnalyzer::analyze() {
28242850

28252851
// Analyze the cost of this block. If we blow through the threshold, this
28262852
// returns false, and we can bail on out.
2827-
InlineResult IR = analyzeBlock(BB, EphValues);
2853+
InlineResult IR = analyzeBlock(BB, *EphValues);
28282854
if (!IR.isSuccess())
28292855
return IR;
28302856

@@ -2967,9 +2993,11 @@ InlineCost llvm::getInlineCost(
29672993
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
29682994
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
29692995
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2970-
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
2996+
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
2997+
EphValuesCacheT *EphValuesCache) {
29712998
return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2972-
GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2999+
GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,
3000+
EphValuesCache);
29733001
}
29743002

29753003
std::optional<int> llvm::getInliningCostEstimate(
@@ -3089,7 +3117,8 @@ InlineCost llvm::getInlineCost(
30893117
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
30903118
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
30913119
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3092-
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
3120+
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
3121+
EphValuesCacheT *EphValuesCache) {
30933122

30943123
auto UserDecision =
30953124
llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
@@ -3105,7 +3134,9 @@ InlineCost llvm::getInlineCost(
31053134
<< ")\n");
31063135

31073136
InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3108-
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE);
3137+
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
3138+
/*BoostIndirect=*/true, /*IgnoreThreshold=*/false,
3139+
EphValuesCache);
31093140
InlineResult ShouldInline = CA.analyze();
31103141

31113142
LLVM_DEBUG(CA.dump());

0 commit comments

Comments
 (0)