Skip to content

Commit f46dd19

Browse files
committed
[mlgo] Incrementally update FunctionPropertiesInfo during inlining
Re-computing FunctionPropertiesInfo after each inlining may be very time consuming: in certain cases, e.g. large caller with lots of callsites, and when the overall IR doesn't increase (thus not tripping a size bloat threshold). This patch addresses this by incrementally updating FunctionPropertiesInfo. Differential Revision: https://reviews.llvm.org/D125841
1 parent abdf0da commit f46dd19

File tree

5 files changed

+666
-53
lines changed

5 files changed

+666
-53
lines changed

llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,33 @@
1414
#ifndef LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
1515
#define LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H
1616

17+
#include "llvm/ADT/SmallPtrSet.h"
18+
#include "llvm/ADT/iterator_range.h"
19+
#include "llvm/IR/InstrTypes.h"
1720
#include "llvm/IR/PassManager.h"
1821

1922
namespace llvm {
2023
class Function;
2124
class LoopInfo;
2225

2326
class FunctionPropertiesInfo {
27+
friend class FunctionPropertiesUpdater;
28+
void updateForBB(const BasicBlock &BB, int64_t Direction);
29+
void updateAggregateStats(const Function &F, const LoopInfo &LI);
30+
void reIncludeBB(const BasicBlock &BB, const LoopInfo &LI);
31+
2432
public:
2533
static FunctionPropertiesInfo getFunctionPropertiesInfo(const Function &F,
2634
const LoopInfo &LI);
2735

36+
bool operator==(const FunctionPropertiesInfo &FPI) const {
37+
return std::memcmp(this, &FPI, sizeof(FunctionPropertiesInfo)) == 0;
38+
}
39+
40+
bool operator!=(const FunctionPropertiesInfo &FPI) const {
41+
return !(*this == FPI);
42+
}
43+
2844
void print(raw_ostream &OS) const;
2945

3046
/// Number of basic blocks
@@ -57,6 +73,9 @@ class FunctionPropertiesInfo {
5773

5874
// Number of Top Level Loops in the Function
5975
int64_t TopLevelLoopCount = 0;
76+
77+
// All non-debug instructions
78+
int64_t TotalInstructionCount = 0;
6079
};
6180

6281
// Analysis pass
@@ -82,5 +101,24 @@ class FunctionPropertiesPrinterPass
82101
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
83102
};
84103

104+
/// Correctly update FunctionPropertiesInfo post-inlining. A
105+
/// FunctionPropertiesUpdater keeps the state necessary for tracking the changes
106+
/// llvm::InlineFunction makes. The idea is that inlining will at most modify
107+
/// a few BBs of the Caller (maybe the entry BB and definitely the callsite BB)
108+
/// and potentially affect exception handling BBs in the case of invoke
109+
/// inlining.
110+
class FunctionPropertiesUpdater {
111+
public:
112+
FunctionPropertiesUpdater(FunctionPropertiesInfo &FPI, const CallBase &CB);
113+
114+
void finish(const LoopInfo &LI);
115+
116+
private:
117+
FunctionPropertiesInfo &FPI;
118+
const BasicBlock &CallSiteBB;
119+
const Function &Caller;
120+
121+
DenseSet<const BasicBlock *> Successors;
122+
};
85123
} // namespace llvm
86124
#endif // LLVM_ANALYSIS_FUNCTIONPROPERTIESANALYSIS_H

llvm/include/llvm/Analysis/MLInlineAdvisor.h

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef LLVM_ANALYSIS_MLINLINEADVISOR_H
1010
#define LLVM_ANALYSIS_MLINLINEADVISOR_H
1111

12+
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
1213
#include "llvm/Analysis/InlineAdvisor.h"
1314
#include "llvm/Analysis/LazyCallGraph.h"
1415
#include "llvm/Analysis/MLModelRunner.h"
@@ -33,13 +34,17 @@ class MLInlineAdvisor : public InlineAdvisor {
3334
void onPassEntry() override;
3435
void onPassExit(LazyCallGraph::SCC *SCC) override;
3536

36-
int64_t getIRSize(const Function &F) const { return F.getInstructionCount(); }
37+
int64_t getIRSize(Function &F) const {
38+
return getCachedFPI(F).TotalInstructionCount;
39+
}
3740
void onSuccessfulInlining(const MLInlineAdvice &Advice,
3841
bool CalleeWasDeleted);
3942

4043
bool isForcedToStop() const { return ForceStop; }
4144
int64_t getLocalCalls(Function &F);
4245
const MLModelRunner &getModelRunner() const { return *ModelRunner.get(); }
46+
FunctionPropertiesInfo &getCachedFPI(Function &) const;
47+
const LoopInfo &getLoopInfo(Function &F) const;
4348

4449
protected:
4550
std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
@@ -67,6 +72,8 @@ class MLInlineAdvisor : public InlineAdvisor {
6772
<< "\n";
6873
}
6974

75+
mutable DenseMap<const Function *, FunctionPropertiesInfo> FPICache;
76+
7077
LazyCallGraph &CG;
7178

7279
int64_t NodeCount = 0;
@@ -86,16 +93,7 @@ class MLInlineAdvisor : public InlineAdvisor {
8693
class MLInlineAdvice : public InlineAdvice {
8794
public:
8895
MLInlineAdvice(MLInlineAdvisor *Advisor, CallBase &CB,
89-
OptimizationRemarkEmitter &ORE, bool Recommendation)
90-
: InlineAdvice(Advisor, CB, ORE, Recommendation),
91-
CallerIRSize(Advisor->isForcedToStop() ? 0
92-
: Advisor->getIRSize(*Caller)),
93-
CalleeIRSize(Advisor->isForcedToStop() ? 0
94-
: Advisor->getIRSize(*Callee)),
95-
CallerAndCalleeEdges(Advisor->isForcedToStop()
96-
? 0
97-
: (Advisor->getLocalCalls(*Caller) +
98-
Advisor->getLocalCalls(*Callee))) {}
96+
OptimizationRemarkEmitter &ORE, bool Recommendation);
9997
virtual ~MLInlineAdvice() = default;
10098

10199
void recordInliningImpl() override;
@@ -112,10 +110,14 @@ class MLInlineAdvice : public InlineAdvice {
112110

113111
private:
114112
void reportContextForRemark(DiagnosticInfoOptimizationBase &OR);
115-
113+
void updateCachedCallerFPI();
116114
MLInlineAdvisor *getAdvisor() const {
117115
return static_cast<MLInlineAdvisor *>(Advisor);
118116
};
117+
// Make a copy of the FPI of the caller right before inlining. If inlining
118+
// fails, we can just update the cache with that value.
119+
const FunctionPropertiesInfo PreInlineCallerFPI;
120+
Optional<FunctionPropertiesUpdater> FPU;
119121
};
120122

121123
} // namespace llvm

llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp

Lines changed: 117 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -12,49 +12,75 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
15+
#include "llvm/ADT/STLExtras.h"
1516
#include "llvm/Analysis/LoopInfo.h"
17+
#include "llvm/IR/CFG.h"
1618
#include "llvm/IR/Instructions.h"
19+
#include <deque>
1720

1821
using namespace llvm;
1922

20-
FunctionPropertiesInfo
21-
FunctionPropertiesInfo::getFunctionPropertiesInfo(const Function &F,
22-
const LoopInfo &LI) {
23-
24-
FunctionPropertiesInfo FPI;
23+
namespace {
24+
int64_t getNrBlocksFromCond(const BasicBlock &BB) {
25+
int64_t Ret = 0;
26+
if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
27+
if (BI->isConditional())
28+
Ret += BI->getNumSuccessors();
29+
} else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
30+
Ret += (SI->getNumCases() + (nullptr != SI->getDefaultDest()));
31+
}
32+
return Ret;
33+
}
2534

26-
FPI.Uses = ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses();
35+
int64_t getUses(const Function &F) {
36+
return ((!F.hasLocalLinkage()) ? 1 : 0) + F.getNumUses();
37+
}
38+
} // namespace
2739

28-
for (const auto &BB : F) {
29-
++FPI.BasicBlockCount;
40+
void FunctionPropertiesInfo::reIncludeBB(const BasicBlock &BB,
41+
const LoopInfo &LI) {
42+
updateForBB(BB, +1);
43+
MaxLoopDepth =
44+
std::max(MaxLoopDepth, static_cast<int64_t>(LI.getLoopDepth(&BB)));
45+
}
3046

31-
if (const auto *BI = dyn_cast<BranchInst>(BB.getTerminator())) {
32-
if (BI->isConditional())
33-
FPI.BlocksReachedFromConditionalInstruction += BI->getNumSuccessors();
34-
} else if (const auto *SI = dyn_cast<SwitchInst>(BB.getTerminator())) {
35-
FPI.BlocksReachedFromConditionalInstruction +=
36-
(SI->getNumCases() + (nullptr != SI->getDefaultDest()));
47+
void FunctionPropertiesInfo::updateForBB(const BasicBlock &BB,
48+
int64_t Direction) {
49+
assert(Direction == 1 || Direction == -1);
50+
BasicBlockCount += Direction;
51+
BlocksReachedFromConditionalInstruction +=
52+
(Direction * getNrBlocksFromCond(BB));
53+
for (const auto &I : BB) {
54+
if (auto *CS = dyn_cast<CallBase>(&I)) {
55+
const auto *Callee = CS->getCalledFunction();
56+
if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
57+
DirectCallsToDefinedFunctions += Direction;
3758
}
38-
39-
for (const auto &I : BB) {
40-
if (auto *CS = dyn_cast<CallBase>(&I)) {
41-
const auto *Callee = CS->getCalledFunction();
42-
if (Callee && !Callee->isIntrinsic() && !Callee->isDeclaration())
43-
++FPI.DirectCallsToDefinedFunctions;
44-
}
45-
if (I.getOpcode() == Instruction::Load) {
46-
++FPI.LoadInstCount;
47-
} else if (I.getOpcode() == Instruction::Store) {
48-
++FPI.StoreInstCount;
49-
}
59+
if (I.getOpcode() == Instruction::Load) {
60+
LoadInstCount += Direction;
61+
} else if (I.getOpcode() == Instruction::Store) {
62+
StoreInstCount += Direction;
5063
}
51-
// Loop Depth of the Basic Block
52-
int64_t LoopDepth;
53-
LoopDepth = LI.getLoopDepth(&BB);
54-
if (FPI.MaxLoopDepth < LoopDepth)
55-
FPI.MaxLoopDepth = LoopDepth;
5664
}
57-
FPI.TopLevelLoopCount += llvm::size(LI);
65+
TotalInstructionCount += Direction * BB.sizeWithoutDebug();
66+
}
67+
68+
void FunctionPropertiesInfo::updateAggregateStats(const Function &F,
69+
const LoopInfo &LI) {
70+
71+
Uses = getUses(F);
72+
TopLevelLoopCount = llvm::size(LI);
73+
}
74+
75+
FunctionPropertiesInfo
76+
FunctionPropertiesInfo::getFunctionPropertiesInfo(const Function &F,
77+
const LoopInfo &LI) {
78+
79+
FunctionPropertiesInfo FPI;
80+
for (const auto &BB : F)
81+
if (!pred_empty(&BB) || BB.isEntryBlock())
82+
FPI.reIncludeBB(BB, LI);
83+
FPI.updateAggregateStats(F, LI);
5884
return FPI;
5985
}
6086

@@ -68,7 +94,8 @@ void FunctionPropertiesInfo::print(raw_ostream &OS) const {
6894
<< "LoadInstCount: " << LoadInstCount << "\n"
6995
<< "StoreInstCount: " << StoreInstCount << "\n"
7096
<< "MaxLoopDepth: " << MaxLoopDepth << "\n"
71-
<< "TopLevelLoopCount: " << TopLevelLoopCount << "\n\n";
97+
<< "TopLevelLoopCount: " << TopLevelLoopCount << "\n"
98+
<< "TotalInstructionCount: " << TotalInstructionCount << "\n\n";
7299
}
73100

74101
AnalysisKey FunctionPropertiesAnalysis::Key;
@@ -87,3 +114,60 @@ FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
87114
AM.getResult<FunctionPropertiesAnalysis>(F).print(OS);
88115
return PreservedAnalyses::all();
89116
}
117+
118+
FunctionPropertiesUpdater::FunctionPropertiesUpdater(
119+
FunctionPropertiesInfo &FPI, const CallBase &CB)
120+
: FPI(FPI), CallSiteBB(*CB.getParent()), Caller(*CallSiteBB.getParent()) {
121+
122+
// For BBs that are likely to change, we subtract from feature totals their
123+
// contribution. Some features, like max loop counts or depths, are left
124+
// invalid, as they will be updated post-inlining.
125+
SmallPtrSet<const BasicBlock *, 4> LikelyToChangeBBs;
126+
// The CB BB will change - it'll either be split or the callee's body (single
127+
// BB) will be pasted in.
128+
LikelyToChangeBBs.insert(&CallSiteBB);
129+
130+
// The caller's entry BB may change due to new alloca instructions.
131+
LikelyToChangeBBs.insert(&*Caller.begin());
132+
133+
// The successors may become unreachable in the case of `invoke` inlining.
134+
// We track successors separately, too, because they form a boundary, together
135+
// with the CB BB ('Entry') between which the inlined callee will be pasted.
136+
Successors.insert(succ_begin(&CallSiteBB), succ_end(&CallSiteBB));
137+
for (const auto *BB : Successors)
138+
LikelyToChangeBBs.insert(BB);
139+
140+
// Commit the change. While some of the BBs accounted for above may play dual
141+
// role - e.g. caller's entry BB may be the same as the callsite BB - set
142+
// insertion semantics make sure we account them once. This needs to be
143+
// followed in `finish`, too.
144+
for (const auto *BB : LikelyToChangeBBs)
145+
FPI.updateForBB(*BB, -1);
146+
}
147+
148+
void FunctionPropertiesUpdater::finish(const LoopInfo &LI) {
149+
DenseSet<const BasicBlock *> ReIncluded;
150+
std::deque<const BasicBlock *> Worklist;
151+
152+
if (&CallSiteBB != &*Caller.begin()) {
153+
FPI.reIncludeBB(*Caller.begin(), LI);
154+
ReIncluded.insert(&*Caller.begin());
155+
}
156+
157+
// Update feature values from the BBs that were copied from the callee, or
158+
// might have been modified because of inlining. The latter have been
159+
// subtracted in the FunctionPropertiesUpdater ctor.
160+
Worklist.push_back(&CallSiteBB);
161+
while (!Worklist.empty()) {
162+
const auto *BB = Worklist.front();
163+
Worklist.pop_front();
164+
if (!ReIncluded.insert(BB).second)
165+
continue;
166+
FPI.reIncludeBB(*BB, LI);
167+
if (!Successors.contains(BB))
168+
for (const auto *Succ : successors(BB))
169+
Worklist.push_back(Succ);
170+
}
171+
FPI.updateAggregateStats(Caller, LI);
172+
assert(FPI == FunctionPropertiesInfo::getFunctionPropertiesInfo(Caller, LI));
173+
}

0 commit comments

Comments
 (0)