Skip to content

Commit 8f13507

Browse files
committed
[ctx_prof] Simple ICP criteria during module inliner
1 parent 4911235 commit 8f13507

File tree

5 files changed

+145
-24
lines changed

5 files changed

+145
-24
lines changed

llvm/include/llvm/Analysis/CtxProfAnalysis.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef LLVM_ANALYSIS_CTXPROFANALYSIS_H
1010
#define LLVM_ANALYSIS_CTXPROFANALYSIS_H
1111

12+
#include "llvm/ADT/SetVector.h"
1213
#include "llvm/IR/GlobalValue.h"
1314
#include "llvm/IR/InstrTypes.h"
1415
#include "llvm/IR/IntrinsicInst.h"
@@ -63,6 +64,13 @@ class PGOContextualProfile {
6364
return getDefinedFunctionGUID(F) != 0;
6465
}
6566

67+
StringRef getFunctionName(GlobalValue::GUID GUID) const {
68+
auto It = FuncInfo.find(GUID);
69+
if (It == FuncInfo.end())
70+
return "";
71+
return It->second.Name;
72+
}
73+
6674
uint32_t getNumCounters(const Function &F) const {
6775
assert(isFunctionKnown(F));
6876
return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex;
@@ -120,6 +128,11 @@ class CtxProfAnalysis : public AnalysisInfoMixin<CtxProfAnalysis> {
120128

121129
/// Get the step instrumentation associated with a `select`
122130
static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI);
131+
132+
// FIXME: refactor to an advisor model, and separate
133+
static void collectIndirectCallPromotionList(
134+
CallBase &IC, Result &Profile,
135+
SetVector<std::pair<CallBase *, Function *>> &Candidates);
123136
};
124137

125138
class CtxProfAnalysisPrinterPass

llvm/lib/Analysis/CtxProfAnalysis.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/Support/CommandLine.h"
2222
#include "llvm/Support/JSON.h"
2323
#include "llvm/Support/MemoryBuffer.h"
24+
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
2425

2526
#define DEBUG_TYPE "ctx_prof"
2627

@@ -309,3 +310,25 @@ const CtxProfFlatProfile PGOContextualProfile::flatten() const {
309310
});
310311
return Flat;
311312
}
313+
314+
void CtxProfAnalysis::collectIndirectCallPromotionList(
315+
CallBase &IC, Result &Profile,
316+
SetVector<std::pair<CallBase *, Function *>> &Candidates) {
317+
const auto *Instr = CtxProfAnalysis::getCallsiteInstrumentation(IC);
318+
if (!Instr)
319+
return;
320+
Module &M = *IC.getParent()->getModule();
321+
const uint32_t CallID = Instr->getIndex()->getZExtValue();
322+
Profile.visit(
323+
[&](const PGOCtxProfContext &Ctx) {
324+
const auto &Targets = Ctx.callsites().find(CallID);
325+
if (Targets == Ctx.callsites().end())
326+
return;
327+
for (const auto &[Guid, _] : Targets->second)
328+
if (auto Name = Profile.getFunctionName(Guid); !Name.empty())
329+
if (auto *Target = M.getFunction(Name))
330+
if (Target->hasFnAttribute(Attribute::AlwaysInline))
331+
Candidates.insert({&IC, Target});
332+
},
333+
IC.getCaller());
334+
}

llvm/lib/Transforms/IPO/ModuleInliner.cpp

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,13 @@ using namespace llvm;
4949
STATISTIC(NumInlined, "Number of functions inlined");
5050
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
5151

52+
cl::opt<bool> CtxProfPromoteAlwaysInline(
53+
"ctx-prof-promote-alwaysinline", cl::init(false), cl::Hidden,
54+
cl::desc("If using a contextual profile in this module, and an indirect "
55+
"call target is marked as alwaysinline, perform indirect call "
56+
"promotion for that target. If multiple targets for an indirect "
57+
"call site fit this description, they are all promoted."));
58+
5259
/// Return true if the specified inline history ID
5360
/// indicates an inline history that includes the specified function.
5461
static bool inlineHistoryIncludes(
@@ -145,10 +152,11 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
145152
assert(Calls != nullptr && "Expected an initialized InlineOrder");
146153

147154
// Populate the initial list of calls in this module.
155+
SetVector<std::pair<CallBase *, Function *>> ICPCandidates;
148156
for (Function &F : M) {
149157
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
150-
for (Instruction &I : instructions(F))
151-
if (auto *CB = dyn_cast<CallBase>(&I))
158+
for (Instruction &I : instructions(F)) {
159+
if (auto *CB = dyn_cast<CallBase>(&I)) {
152160
if (Function *Callee = CB->getCalledFunction()) {
153161
if (!Callee->isDeclaration())
154162
Calls->push({CB, -1});
@@ -163,7 +171,17 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
163171
<< setIsVerbose();
164172
});
165173
}
174+
} else if (CtxProfPromoteAlwaysInline && CtxProf &&
175+
CB->isIndirectCall()) {
176+
CtxProfAnalysis::collectIndirectCallPromotionList(*CB, CtxProf,
177+
ICPCandidates);
166178
}
179+
}
180+
}
181+
}
182+
for (auto &[CB, Target] : ICPCandidates) {
183+
if (auto *DirectCB = promoteCallWithIfThenElse(*CB, *Target, CtxProf))
184+
Calls->push({DirectCB, -1});
167185
}
168186
if (Calls->empty())
169187
return PreservedAnalyses::all();
@@ -242,13 +260,22 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
242260
// iteration because the next iteration may not happen and we may
243261
// miss inlining it.
244262
// FIXME: enable for ctxprof.
245-
if (!CtxProf)
246-
if (tryPromoteCall(*ICB))
247-
NewCallee = ICB->getCalledFunction();
263+
if (CtxProfPromoteAlwaysInline && CtxProf) {
264+
SetVector<std::pair<CallBase *, Function *>> Candidates;
265+
CtxProfAnalysis::collectIndirectCallPromotionList(*ICB, CtxProf,
266+
Candidates);
267+
for (auto &[DC, _] : Candidates) {
268+
assert(!DC->isIndirectCall());
269+
assert(!DC->getCalledFunction()->isDeclaration() &&
270+
"CtxProf promotes calls to defined targets only");
271+
Calls->push({DC, NewHistoryID});
272+
}
273+
} else if (tryPromoteCall(*ICB)) {
274+
NewCallee = ICB->getCalledFunction();
275+
if (NewCallee && !NewCallee->isDeclaration())
276+
Calls->push({ICB, NewHistoryID});
277+
}
248278
}
249-
if (NewCallee)
250-
if (!NewCallee->isDeclaration())
251-
Calls->push({ICB, NewHistoryID});
252279
}
253280
}
254281

llvm/lib/Transforms/Utils/CallPromotionUtils.cpp

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -623,34 +623,37 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee,
623623
// All the ctx-es belonging to a function must have the same size counters.
624624
Ctx.resizeCounters(NewCountersSize);
625625

626-
// Maybe in this context, the indirect callsite wasn't observed at all
626+
// Maybe in this context, the indirect callsite wasn't observed at all. That
627+
// would make both direct and indirect BBs cold - which is what we already
628+
// have from resising the counters.
627629
if (!Ctx.hasCallsite(CSIndex))
628630
return;
629631
auto &CSData = Ctx.callsite(CSIndex);
630-
auto It = CSData.find(CalleeGUID);
631632

632-
// Maybe we did notice the indirect callsite, but to other targets.
633-
if (It == CSData.end())
634-
return;
635-
636-
assert(CalleeGUID == It->second.guid());
637-
638-
uint32_t DirectCount = It->second.getEntrycount();
639-
uint32_t TotalCount = 0;
633+
uint64_t TotalCount = 0;
640634
for (const auto &[_, V] : CSData)
641635
TotalCount += V.getEntrycount();
636+
uint64_t DirectCount = 0;
637+
// If we called the direct target, update the DirectCount. If we didn't, we
638+
// still want to update the indirect BB (to which the TotalCount goes, in
639+
// that case).
640+
if (auto It = CSData.find(CalleeGUID); It != CSData.end()) {
641+
assert(CalleeGUID == It->second.guid());
642+
DirectCount = It->second.getEntrycount();
643+
// This direct target needs to be moved to this caller under the
644+
// newly-allocated callsite index.
645+
assert(Ctx.callsites().count(NewCSID) == 0);
646+
Ctx.ingestContext(NewCSID, std::move(It->second));
647+
CSData.erase(CalleeGUID);
648+
}
649+
642650
assert(TotalCount >= DirectCount);
643-
uint32_t IndirectCount = TotalCount - DirectCount;
651+
uint64_t IndirectCount = TotalCount - DirectCount;
644652
// The ICP's effect is as-if the direct BB would have been taken DirectCount
645653
// times, and the indirect BB, IndirectCount times
646654
Ctx.counters()[DirectID] = DirectCount;
647655
Ctx.counters()[IndirectID] = IndirectCount;
648656

649-
// This particular indirect target needs to be moved to this caller under
650-
// the newly-allocated callsite index.
651-
assert(Ctx.callsites().count(NewCSID) == 0);
652-
Ctx.ingestContext(NewCSID, std::move(It->second));
653-
CSData.erase(CalleeGUID);
654657
};
655658
CtxProf.update(ProfileUpdater, &Caller);
656659
return &DirectCall;
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; RUN: split-file %s %t
2+
; RUN: llvm-ctxprof-util fromJSON --input %t/profile.json --output %t/profile.ctxprofdata
3+
;
4+
; In the given profile, in one of the contexts the indirect call is taken, the
5+
; target we're trying to ICP - GUID:2000 - doesn't appear at all. That should
6+
; contribute to the count of the "indirect call BB".
7+
; RUN: opt %t/test.ll -S -passes='require<ctx-prof-analysis>,module-inline,ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata -ctx-prof-promote-alwaysinline
8+
9+
; CHECK-LABEL: define i32 @caller(ptr %c)
10+
; CHECK-NEXT: [[CND:[0-9]+]] = icmp eq ptr %c, @one
11+
; CHECK-NEXT: br i1 [[CND]], label %{{.*}}, label %{{.*}}, !prof ![[BW:[0-9]+]]
12+
13+
; CHECK: ![[BW]] = !{!"branch_weights", i32 10, i32 10}
14+
15+
;--- test.ll
16+
declare i32 @external(i32 %x)
17+
define i32 @one() #0 !guid !0 {
18+
call void @llvm.instrprof.increment(ptr @one, i64 123, i32 1, i32 0)
19+
call void @llvm.instrprof.callsite(ptr @one, i64 123, i32 1, i32 0, ptr @external)
20+
%ret = call i32 @external(i32 1)
21+
ret i32 %ret
22+
}
23+
24+
define i32 @caller(ptr %c) #1 !guid !1 {
25+
call void @llvm.instrprof.increment(ptr @caller, i64 567, i32 1, i32 0)
26+
call void @llvm.instrprof.callsite(ptr @caller, i64 567, i32 1, i32 0, ptr %c)
27+
%ret = call i32 %c()
28+
ret i32 %ret
29+
}
30+
31+
define i32 @root(ptr %c) !guid !2 {
32+
call void @llvm.instrprof.increment(ptr @root, i64 432, i32 1, i32 0)
33+
call void @llvm.instrprof.callsite(ptr @root, i64 432, i32 2, i32 0, ptr @caller)
34+
%a = call i32 @caller(ptr %c)
35+
call void @llvm.instrprof.callsite(ptr @root, i64 432, i32 2, i32 1, ptr @caller)
36+
%b = call i32 @caller(ptr %c)
37+
%ret = add i32 %a, %b
38+
ret i32 %ret
39+
40+
}
41+
42+
attributes #0 = { alwaysinline }
43+
attributes #1 = { noinline }
44+
!0 = !{i64 1000}
45+
!1 = !{i64 3000}
46+
!2 = !{i64 4000}
47+
48+
;--- profile.json
49+
[ {
50+
"Guid": 4000, "Counters":[10], "Callsites": [
51+
[{"Guid":3000, "Counters":[10], "Callsites":[[{"Guid":1000, "Counters":[10]}]]}],
52+
[{"Guid":3000, "Counters":[10], "Callsites":[[{"Guid":9000, "Counters":[10]}]]}]
53+
]
54+
}
55+
]

0 commit comments

Comments
 (0)