Skip to content

Commit daaa62e

Browse files
committed
[InlineCost] Use EphemeralValuesCache to improve compile time
`CallAnalyzer::analyze()` can take several hours to run if the function contains thousands of @llvm.assume() calls and there are thousands of callsites. The time is spent in `collectEphemeralvalues()`. This patch adds caching to InlineCost and will only collect the ephemeral values once per function.
1 parent 47571c3 commit daaa62e

File tree

5 files changed

+62
-26
lines changed

5 files changed

+62
-26
lines changed

llvm/include/llvm/Analysis/InlineCost.h

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class Function;
3131
class ProfileSummaryInfo;
3232
class TargetTransformInfo;
3333
class TargetLibraryInfo;
34+
class EphemeralValuesCache;
3435

3536
namespace InlineConstants {
3637
// Various thresholds used by inline cost analysis.
@@ -273,14 +274,14 @@ int getCallsiteCost(const TargetTransformInfo &TTI, const CallBase &Call,
273274
///
274275
/// Also note that calling this function *dynamically* computes the cost of
275276
/// inlining the callsite. It is an expensive, heavyweight call.
276-
InlineCost
277-
getInlineCost(CallBase &Call, const InlineParams &Params,
278-
TargetTransformInfo &CalleeTTI,
279-
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
280-
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
281-
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
282-
ProfileSummaryInfo *PSI = nullptr,
283-
OptimizationRemarkEmitter *ORE = nullptr);
277+
InlineCost getInlineCost(
278+
CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
279+
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
280+
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
281+
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
282+
ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr,
283+
std::optional<function_ref<EphemeralValuesCache &(Function &)>>
284+
GetEphValuesCache = std::nullopt);
284285

285286
/// Get an InlineCost with the callee explicitly specified.
286287
/// This allows you to calculate the cost of inlining a function via a
@@ -294,7 +295,9 @@ getInlineCost(CallBase &Call, Function *Callee, const InlineParams &Params,
294295
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
295296
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
296297
ProfileSummaryInfo *PSI = nullptr,
297-
OptimizationRemarkEmitter *ORE = nullptr);
298+
OptimizationRemarkEmitter *ORE = nullptr,
299+
std::optional<function_ref<EphemeralValuesCache &(Function &)>>
300+
GetEphValuesCache = std::nullopt);
298301

299302
/// Returns InlineResult::success() if the call site should be always inlined
300303
/// because of user directives, and the inlining is viable. Returns

llvm/lib/Analysis/InlineAdvisor.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "llvm/ADT/Statistic.h"
1616
#include "llvm/ADT/StringExtras.h"
1717
#include "llvm/Analysis/AssumptionCache.h"
18+
#include "llvm/Analysis/EphemeralValuesCache.h"
1819
#include "llvm/Analysis/InlineCost.h"
1920
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
2021
#include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -150,6 +151,10 @@ std::optional<llvm::InlineCost> static getDefaultInlineAdvice(
150151
auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
151152
return FAM.getResult<TargetLibraryAnalysis>(F);
152153
};
154+
auto GetEphValuesCache =
155+
[&](Function &F) -> EphemeralValuesAnalysis::Result & {
156+
return FAM.getResult<EphemeralValuesAnalysis>(F);
157+
};
153158

154159
Function &Callee = *CB.getCalledFunction();
155160
auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
@@ -158,7 +163,8 @@ std::optional<llvm::InlineCost> static getDefaultInlineAdvice(
158163
Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
159164
DEBUG_TYPE);
160165
return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, GetTLI,
161-
GetBFI, PSI, RemarksEnabled ? &ORE : nullptr);
166+
GetBFI, PSI, RemarksEnabled ? &ORE : nullptr,
167+
GetEphValuesCache);
162168
};
163169
return llvm::shouldInline(
164170
CB, CalleeTTI, GetInlineCost, ORE,

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/Analysis/BlockFrequencyInfo.h"
2121
#include "llvm/Analysis/CodeMetrics.h"
2222
#include "llvm/Analysis/ConstantFolding.h"
23+
#include "llvm/Analysis/EphemeralValuesCache.h"
2324
#include "llvm/Analysis/InstructionSimplify.h"
2425
#include "llvm/Analysis/LoopInfo.h"
2526
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -269,6 +270,10 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
269270
/// easily cacheable. Instead, use the cover function paramHasAttr.
270271
CallBase &CandidateCall;
271272

273+
/// Getter for the cache of ephemeral values.
274+
std::optional<function_ref<EphemeralValuesCache &(Function &)>>
275+
GetEphValuesCache;
276+
272277
/// Extension points for handling callsite features.
273278
// Called before a basic block was analyzed.
274279
virtual void onBlockStart(const BasicBlock *BB) {}
@@ -462,7 +467,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
462467

463468
// Custom analysis routines.
464469
InlineResult analyzeBlock(BasicBlock *BB,
465-
SmallPtrSetImpl<const Value *> &EphValues);
470+
const SmallPtrSetImpl<const Value *> &EphValues);
466471

467472
// Disable several entry points to the visitor so we don't accidentally use
468473
// them by declaring but not defining them here.
@@ -510,10 +515,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
510515
function_ref<BlockFrequencyInfo &(Function &)> GetBFI = nullptr,
511516
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
512517
ProfileSummaryInfo *PSI = nullptr,
513-
OptimizationRemarkEmitter *ORE = nullptr)
518+
OptimizationRemarkEmitter *ORE = nullptr,
519+
std::optional<function_ref<EphemeralValuesCache &(Function &)>>
520+
GetEphValuesCache = std::nullopt)
514521
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
515522
GetTLI(GetTLI), PSI(PSI), F(Callee), DL(F.getDataLayout()), ORE(ORE),
516-
CandidateCall(Call) {}
523+
CandidateCall(Call), GetEphValuesCache(GetEphValuesCache) {}
517524

518525
InlineResult analyze();
519526

@@ -1126,9 +1133,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
11261133
function_ref<const TargetLibraryInfo &(Function &)> GetTLI = nullptr,
11271134
ProfileSummaryInfo *PSI = nullptr,
11281135
OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true,
1129-
bool IgnoreThreshold = false)
1136+
bool IgnoreThreshold = false,
1137+
std::optional<function_ref<EphemeralValuesCache &(Function &)>>
1138+
GetEphValuesCache = std::nullopt)
11301139
: CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, GetTLI, PSI,
1131-
ORE),
1140+
ORE, GetEphValuesCache),
11321141
ComputeFullInlineCost(OptComputeFullInlineCost ||
11331142
Params.ComputeFullInlineCost || ORE ||
11341143
isCostBenefitAnalysisEnabled()),
@@ -2566,7 +2575,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
25662575
/// viable, and true if inlining remains viable.
25672576
InlineResult
25682577
CallAnalyzer::analyzeBlock(BasicBlock *BB,
2569-
SmallPtrSetImpl<const Value *> &EphValues) {
2578+
const SmallPtrSetImpl<const Value *> &EphValues) {
25702579
for (Instruction &I : *BB) {
25712580
// FIXME: Currently, the number of instructions in a function regardless of
25722581
// our ability to simplify them during inline to constants or dead code,
@@ -2781,11 +2790,15 @@ InlineResult CallAnalyzer::analyze() {
27812790
NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
27822791
NumAllocaArgs = SROAArgValues.size();
27832792

2784-
// FIXME: If a caller has multiple calls to a callee, we end up recomputing
2785-
// the ephemeral values multiple times (and they're completely determined by
2786-
// the callee, so this is purely duplicate work).
2787-
SmallPtrSet<const Value *, 32> EphValues;
2788-
CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
2793+
// Collecting the ephemeral values of `F` can be expensive, so use the
2794+
// ephemeral values cache if available.
2795+
SmallPtrSet<const Value *, 32> EphValuesStorage;
2796+
const SmallPtrSetImpl<const Value *> *EphValues = &EphValuesStorage;
2797+
auto &AC = GetAssumptionCache(F);
2798+
if (GetEphValuesCache)
2799+
EphValues = &(*GetEphValuesCache)(F).ephValues();
2800+
else
2801+
CodeMetrics::collectEphemeralValues(&F, &AC, EphValuesStorage);
27892802

27902803
// The worklist of live basic blocks in the callee *after* inlining. We avoid
27912804
// adding basic blocks of the callee which can be proven to be dead for this
@@ -2824,7 +2837,7 @@ InlineResult CallAnalyzer::analyze() {
28242837

28252838
// Analyze the cost of this block. If we blow through the threshold, this
28262839
// returns false, and we can bail on out.
2827-
InlineResult IR = analyzeBlock(BB, EphValues);
2840+
InlineResult IR = analyzeBlock(BB, *EphValues);
28282841
if (!IR.isSuccess())
28292842
return IR;
28302843

@@ -2967,9 +2980,12 @@ InlineCost llvm::getInlineCost(
29672980
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
29682981
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
29692982
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
2970-
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
2983+
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
2984+
std::optional<function_ref<EphemeralValuesCache &(Function &)>>
2985+
GetEphValuesCache) {
29712986
return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI,
2972-
GetAssumptionCache, GetTLI, GetBFI, PSI, ORE);
2987+
GetAssumptionCache, GetTLI, GetBFI, PSI, ORE,
2988+
GetEphValuesCache);
29732989
}
29742990

29752991
std::optional<int> llvm::getInliningCostEstimate(
@@ -3089,7 +3105,9 @@ InlineCost llvm::getInlineCost(
30893105
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
30903106
function_ref<const TargetLibraryInfo &(Function &)> GetTLI,
30913107
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
3092-
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) {
3108+
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE,
3109+
std::optional<function_ref<EphemeralValuesCache &(Function &)>>
3110+
GetEphValuesCache) {
30933111

30943112
auto UserDecision =
30953113
llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI);
@@ -3105,7 +3123,9 @@ InlineCost llvm::getInlineCost(
31053123
<< ")\n");
31063124

31073125
InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI,
3108-
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE);
3126+
GetAssumptionCache, GetBFI, GetTLI, PSI, ORE,
3127+
/*BoostIndirect=*/true, /*IgnoreThreshold=*/false,
3128+
GetEphValuesCache);
31093129
InlineResult ShouldInline = CA.analyze();
31103130

31113131
LLVM_DEBUG(CA.dump());

llvm/lib/Transforms/IPO/Inliner.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "llvm/Analysis/BasicAliasAnalysis.h"
2727
#include "llvm/Analysis/BlockFrequencyInfo.h"
2828
#include "llvm/Analysis/CGSCCPassManager.h"
29+
#include "llvm/Analysis/EphemeralValuesCache.h"
2930
#include "llvm/Analysis/InlineAdvisor.h"
3031
#include "llvm/Analysis/InlineCost.h"
3132
#include "llvm/Analysis/LazyCallGraph.h"
@@ -388,6 +389,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
388389
Advice->recordUnsuccessfulInlining(IR);
389390
continue;
390391
}
392+
// TODO: Shouldn't we be invalidating all analyses on F here?
393+
// The caller was modified, so invalidate Ephemeral Values.
394+
FAM.getResult<EphemeralValuesAnalysis>(F).clear();
391395

392396
DidInline = true;
393397
InlinedCallees.insert(&Callee);

llvm/test/Transforms/Inline/cgscc-incremental-invalidate.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,18 @@
1212
; CHECK: Invalidating analysis: LoopAnalysis on test1_f
1313
; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_f
1414
; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_f
15+
; CHECK: Invalidating analysis: EphemeralValuesAnalysis on test1_f
1516
; CHECK: Running analysis: DominatorTreeAnalysis on test1_g
1617
; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_g
1718
; CHECK: Invalidating analysis: LoopAnalysis on test1_g
1819
; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_g
1920
; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_g
21+
; CHECK: Invalidating analysis: EphemeralValuesAnalysis on test1_g
2022
; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_h
2123
; CHECK: Invalidating analysis: LoopAnalysis on test1_h
2224
; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_h
2325
; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_h
26+
; CHECK: Invalidating analysis: EphemeralValuesAnalysis on test1_h
2427
; CHECK-NOT: Invalidating analysis:
2528
; CHECK: Running pass: DominatorTreeVerifierPass on test1_g
2629
; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on test1_g

0 commit comments

Comments
 (0)