Skip to content

Commit 61ca873

Browse files
committed
[InlineCost] Use EphemeralValuesCache to improve compile time
`CallAnalyzer::analyze()` can take several hours to run if the function contains thousands of @llvm.assume() calls and there are thousands of callsites. The time is spent in `collectEphemeralvalues()`. This patch adds caching to InlineCost and will only collect the ephemeral values once per function.
1 parent d89a6cb commit 61ca873

File tree

5 files changed

+65
-33
lines changed

5 files changed

+65
-33
lines changed

llvm/include/llvm/Analysis/InlineCost.h

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class Function;
3131
class ProfileSummaryInfo;
3232
class TargetTransformInfo;
3333
class TargetLibraryInfo;
34+
class EphemeralValuesCache;
3435

3536
namespace InlineConstants {
3637
// Various thresholds used by inline cost analysis.
@@ -273,28 +274,29 @@ int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call,
273274
///
274275
/// Also note that calling this function *dynamically* computes the cost of
275276
/// inlining the callsite. It is an expensive, heavyweight call.
276-
InlineCost
277-
getInlineCost(CallBase &Call, const InlineParams &Params,
278-
TargetTransformInfo &CalleeTTI,
279-
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
280-
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
281-
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
282-
ProfileSummaryInfo *PSI = nullptr,
283-
OptimizationRemarkEmitter *ORE = nullptr);
277+
InlineCost getInlineCost(
278+
CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
279+
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
280+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
281+
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
282+
ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr,
283+
function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
284+
nullptr);
284285

285286
/// Get an InlineCost with the callee explicitly specified.
286287
/// This allows you to calculate the cost of inlining a function via a
287288
/// pointer. This behaves exactly as the version with no explicit callee
288289
/// parameter in all other respects.
289290
//
290-
InlineCost
291-
getInlineCost(CallBase &Call, Function *Callee, const InlineParams &Params,
292-
TargetTransformInfo &CalleeTTI,
293-
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
294-
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
295-
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
296-
ProfileSummaryInfo *PSI = nullptr,
297-
OptimizationRemarkEmitter *ORE = nullptr);
291+
InlineCost getInlineCost(
292+
CallBase &Call, Function *Callee, const InlineParams &Params,
293+
TargetTransformInfo &CalleeTTI,
294+
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
295+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
296+
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
297+
ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr,
298+
function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
299+
nullptr);
298300

299301
/// Returns InlineResult::success() if the call site should be always inlined
300302
/// because of user directives, and the inlining is viable. Returns

llvm/lib/Analysis/InlineAdvisor.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "llvm/ADT/Statistic.h"
1616
#include "llvm/ADT/StringExtras.h"
1717
#include "llvm/Analysis/AssumptionCache.h"
18+
#include "llvm/Analysis/EphemeralValuesCache.h"
1819
#include "llvm/Analysis/InlineCost.h"
1920
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
2021
#include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -150,6 +151,10 @@ std::optional<llvm::InlineCost> static getDefaultInlineAdvice(
150151
auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
151152
return FAM.getResult<TargetLibraryAnalysis>(F);
152153
};
154+
auto GetEphValuesCache =
155+
[&](Function &F) -> EphemeralValuesAnalysis::Result & {
156+
return FAM.getResult<EphemeralValuesAnalysis>(F);
157+
};
153158

154159
Function &Callee = *CB.getCalledFunction();
155160
auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
@@ -158,7 +163,8 @@ std::optional<llvm::InlineCost> static getDefaultInlineAdvice(
158163
Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
159164
DEBUG_TYPE);
160165
return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, GetTLI,
161-
GetBFI, PSI, RemarksEnabled ? &ORE : nullptr);
166+
GetBFI, PSI, RemarksEnabled ? &ORE : nullptr,
167+
GetEphValuesCache);
162168
};
163169
return llvm::shouldInline(
164170
CB, CalleeTTI, GetInlineCost, ORE,

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/Analysis/BlockFrequencyInfo.h"
2121
#include "llvm/Analysis/CodeMetrics.h"
2222
#include "llvm/Analysis/ConstantFolding.h"
23+
#include "llvm/Analysis/EphemeralValuesCache.h"
2324
#include "llvm/Analysis/InstructionSimplify.h"
2425
#include "llvm/Analysis/LoopInfo.h"
2526
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -269,6 +270,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
269270
/// easily cacheable. Instead, use the cover function paramHasAttr.
270271
CallBase &CandidateCall;
271272

273+
/// Getter for the cache of ephemeral values.
274+
function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache = nullptr;
275+
272276
/// Extension points for handling callsite features.
273277
// Called before a basic block was analyzed.
274278
virtual void onBlockStart(const BasicBlock *BB) {}
@@ -462,7 +466,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
462466

463467
// Custom analysis routines.
464468
InlineResult analyzeBlock(BasicBlock *BB,
465-
SmallPtrSetImpl<const Value *> &EphValues);
469+
const SmallPtrSetImpl<const Value *> &EphValues);
466470

467471
// Disable several entry points to the visitor so we don't accidentally use
468472
// them by declaring but not defining them here.
@@ -510,10 +514,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
510514
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
511515
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
512516
ProfileSummaryInfo *PSI = nullptr,
513-
OptimizationRemarkEmitter *ORE = nullptr)
517+
OptimizationRemarkEmitter *ORE = nullptr,
518+
function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
519+
nullptr)
514520
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
515521
GetTLI(GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
516-
CandidateCall(Call) {}
522+
CandidateCall(Call), GetEphValuesCache(GetEphValuesCache) {}
517523

518524
InlineResult analyze();
519525

@@ -1126,9 +1132,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
11261132
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
11271133
ProfileSummaryInfo *PSI = nullptr,
11281134
OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
1129-
bool IgnoreThreshold = false)
1135+
bool IgnoreThreshold = false,
1136+
function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache =
1137+
nullptr)
11301138
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1131-
ORE),
1139+
ORE, GetEphValuesCache),
11321140
ComputeFullInlineCost(OptComputeFullInlineCost ||
11331141
Params.ComputeFullInlineCost || ORE ||
11341142
isCostBenefitAnalysisEnabled()),
@@ -2566,7 +2574,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
25662574
/// viable, and true if inlining remains viable.
25672575
InlineResult
25682576
CallAnalyzer::analyzeBlock(BasicBlock *BB,
2569-
SmallPtrSetImpl<const Value *> &EphValues) {
2577+
const SmallPtrSetImpl<const Value *> &EphValues) {
25702578
for (Instruction &I : *BB) {
25712579
// FIXME: Currently, the number of instructions in a function regardless of
25722580
// our ability to simplify them during inline to constants or dead code,
@@ -2781,11 +2789,15 @@ InlineResult CallAnalyzer::analyze() {
27812789
NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
27822790
NumAllocaArgs = SROAArgValues.size();
27832791

2784-
// FIXME: If a caller has multiple calls to a callee, we end up recomputing
2785-
// the ephemeral values multiple times (and they're completely determined by
2786-
// the callee, so this is purely duplicate work).
2787-
SmallPtrSet<const Value *, 32> EphValues;
2788-
CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
2792+
// Collecting the ephemeral values of `F` can be expensive, so use the
2793+
// ephemeral values cache if available.
2794+
SmallPtrSet<const Value *, 32> EphValuesStorage;
2795+
const SmallPtrSetImpl<const Value *> *EphValues = &EphValuesStorage;
2796+
if (GetEphValuesCache)
2797+
EphValues = &GetEphValuesCache(F).ephValues();
2798+
else
2799+
CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F),
2800+
EphValuesStorage);
27892801

27902802
// The worklist of live basic blocks in the callee *after* inlining. We avoid
27912803
// adding basic blocks of the callee which can be proven to be dead for this
@@ -2824,7 +2836,7 @@ InlineResult CallAnalyzer::analyze() {
28242836

28252837
// Analyze the cost of this block. If we blow through the threshold, this
28262838
// returns false, and we can bail on out.
2827-
InlineResult IR = analyzeBlock(BB, EphValues);
2839+
InlineResult IR = analyzeBlock(BB, *EphValues);
28282840
if (!IR.isSuccess())
28292841
return IR;
28302842

@@ -2967,9 +2979,11 @@ InlineCost llvm::getInlineCost(
29672979
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
29682980
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
29692981
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2970-
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
2982+
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
2983+
function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache) {
29712984
return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2972-
GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2985+
GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,
2986+
GetEphValuesCache);
29732987
}
29742988

29752989
std::optional<int> llvm::getInliningCostEstimate(
@@ -3089,7 +3103,8 @@ InlineCost llvm::getInlineCost(
30893103
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
30903104
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
30913105
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3092-
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
3106+
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
3107+
function_ref<EphemeralValuesCache &(Function &)> GetEphValuesCache) {
30933108

30943109
auto UserDecision =
30953110
llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
@@ -3105,7 +3120,9 @@ InlineCost llvm::getInlineCost(
31053120
<< ")\n");
31063121

31073122
InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3108-
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE);
3123+
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
3124+
/*BoostIndirect=*/true, /*IgnoreThreshold=*/false,
3125+
GetEphValuesCache);
31093126
InlineResult ShouldInline = CA.analyze();
31103127

31113128
LLVM_DEBUG(CA.dump());

llvm/lib/Transforms/IPO/Inliner.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "llvm/Analysis/BasicAliasAnalysis.h"
2727
#include "llvm/Analysis/BlockFrequencyInfo.h"
2828
#include "llvm/Analysis/CGSCCPassManager.h"
29+
#include "llvm/Analysis/EphemeralValuesCache.h"
2930
#include "llvm/Analysis/InlineAdvisor.h"
3031
#include "llvm/Analysis/InlineCost.h"
3132
#include "llvm/Analysis/LazyCallGraph.h"
@@ -388,6 +389,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
388389
Advice->recordUnsuccessfulInlining(IR);
389390
continue;
390391
}
392+
// TODO: Shouldn't we be invalidating all analyses on F here?
393+
// The caller was modified, so invalidate Ephemeral Values.
394+
FAM.getResult<EphemeralValuesAnalysis>(F).clear();
391395

392396
DidInline = true;
393397
InlinedCallees.insert(&Callee);

llvm/test/Transforms/Inline/cgscc-incremental-invalidate.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,18 @@
1212
; CHECK: Invalidating analysis: LoopAnalysis on test1_f
1313
; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_f
1414
; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_f
15+
; CHECK: Invalidating analysis: EphemeralValuesAnalysis on test1_f
1516
; CHECK: Running analysis: DominatorTreeAnalysis on test1_g
1617
; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_g
1718
; CHECK: Invalidating analysis: LoopAnalysis on test1_g
1819
; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_g
1920
; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_g
21+
; CHECK: Invalidating analysis: EphemeralValuesAnalysis on test1_g
2022
; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_h
2123
; CHECK: Invalidating analysis: LoopAnalysis on test1_h
2224
; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_h
2325
; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_h
26+
; CHECK: Invalidating analysis: EphemeralValuesAnalysis on test1_h
2427
; CHECK-NOT: Invalidating analysis:
2528
; CHECK: Running pass: DominatorTreeVerifierPass on test1_g
2629
; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on test1_g

0 commit comments

Comments
 (0)