Skip to content

Commit c8365fe

Browse files
authored
[ctx_prof] Simple ICP criteria during module inliner (#109881)
This is mostly for test: under contextual profiling, we perform ICP for those indirect callsites which have targets marked as `alwaysinline`. This helped uncover a bug with the way the profile was updated upon ICP, where we were skipping over the update if the target wasn't called in that context. That was resulting in incorrect counts for the indirect BB. Also flyby fix to the total/direct count values, they should be 64-bit (as all counters are in the contextual profile)
1 parent 4db0cc4 commit c8365fe

File tree

5 files changed

+130
-18
lines changed

5 files changed

+130
-18
lines changed

llvm/include/llvm/Analysis/CtxProfAnalysis.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef LLVM_ANALYSIS_CTXPROFANALYSIS_H
1010
#define LLVM_ANALYSIS_CTXPROFANALYSIS_H
1111

12+
#include "llvm/ADT/SetVector.h"
1213
#include "llvm/IR/GlobalValue.h"
1314
#include "llvm/IR/InstrTypes.h"
1415
#include "llvm/IR/IntrinsicInst.h"
@@ -63,6 +64,13 @@ class PGOContextualProfile {
6364
return getDefinedFunctionGUID(F) != 0;
6465
}
6566

67+
StringRef getFunctionName(GlobalValue::GUID GUID) const {
68+
auto It = FuncInfo.find(GUID);
69+
if (It == FuncInfo.end())
70+
return "";
71+
return It->second.Name;
72+
}
73+
6674
uint32_t getNumCounters(const Function &F) const {
6775
assert(isFunctionKnown(F));
6876
return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex;
@@ -120,6 +128,11 @@ class CtxProfAnalysis : public AnalysisInfoMixin<CtxProfAnalysis> {
120128

121129
/// Get the step instrumentation associated with a `select`
122130
static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI);
131+
132+
// FIXME: refactor to an advisor model, and separate
133+
static void collectIndirectCallPromotionList(
134+
CallBase &IC, Result &Profile,
135+
SetVector<std::pair<CallBase *, Function *>> &Candidates);
123136
};
124137

125138
class CtxProfAnalysisPrinterPass

llvm/lib/Analysis/CtxProfAnalysis.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/Support/CommandLine.h"
2222
#include "llvm/Support/JSON.h"
2323
#include "llvm/Support/MemoryBuffer.h"
24+
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
2425

2526
#define DEBUG_TYPE "ctx_prof"
2627

@@ -309,3 +310,25 @@ const CtxProfFlatProfile PGOContextualProfile::flatten() const {
309310
});
310311
return Flat;
311312
}
313+
314+
void CtxProfAnalysis::collectIndirectCallPromotionList(
315+
CallBase &IC, Result &Profile,
316+
SetVector<std::pair<CallBase *, Function *>> &Candidates) {
317+
const auto *Instr = CtxProfAnalysis::getCallsiteInstrumentation(IC);
318+
if (!Instr)
319+
return;
320+
Module &M = *IC.getParent()->getModule();
321+
const uint32_t CallID = Instr->getIndex()->getZExtValue();
322+
Profile.visit(
323+
[&](const PGOCtxProfContext &Ctx) {
324+
const auto &Targets = Ctx.callsites().find(CallID);
325+
if (Targets == Ctx.callsites().end())
326+
return;
327+
for (const auto &[Guid, _] : Targets->second)
328+
if (auto Name = Profile.getFunctionName(Guid); !Name.empty())
329+
if (auto *Target = M.getFunction(Name))
330+
if (Target->hasFnAttribute(Attribute::AlwaysInline))
331+
Candidates.insert({&IC, Target});
332+
},
333+
IC.getCaller());
334+
}

llvm/lib/Transforms/IPO/ModuleInliner.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,13 @@ using namespace llvm;
4949
STATISTIC(NumInlined, "Number of functions inlined");
5050
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
5151

52+
cl::opt<bool> CtxProfPromoteAlwaysInline(
53+
"ctx-prof-promote-alwaysinline", cl::init(false), cl::Hidden,
54+
cl::desc("If using a contextual profile in this module, and an indirect "
55+
"call target is marked as alwaysinline, perform indirect call "
56+
"promotion for that target. If multiple targets for an indirect "
57+
"call site fit this description, they are all promoted."));
58+
5259
/// Return true if the specified inline history ID
5360
/// indicates an inline history that includes the specified function.
5461
static bool inlineHistoryIncludes(
@@ -145,10 +152,11 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
145152
assert(Calls != nullptr && "Expected an initialized InlineOrder");
146153

147154
// Populate the initial list of calls in this module.
155+
SetVector<std::pair<CallBase *, Function *>> ICPCandidates;
148156
for (Function &F : M) {
149157
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
150-
for (Instruction &I : instructions(F))
151-
if (auto *CB = dyn_cast<CallBase>(&I))
158+
for (Instruction &I : instructions(F)) {
159+
if (auto *CB = dyn_cast<CallBase>(&I)) {
152160
if (Function *Callee = CB->getCalledFunction()) {
153161
if (!Callee->isDeclaration())
154162
Calls->push({CB, -1});
@@ -163,7 +171,17 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
163171
<< setIsVerbose();
164172
});
165173
}
174+
} else if (CtxProfPromoteAlwaysInline && CtxProf &&
175+
CB->isIndirectCall()) {
176+
CtxProfAnalysis::collectIndirectCallPromotionList(*CB, CtxProf,
177+
ICPCandidates);
166178
}
179+
}
180+
}
181+
}
182+
for (auto &[CB, Target] : ICPCandidates) {
183+
if (auto *DirectCB = promoteCallWithIfThenElse(*CB, *Target, CtxProf))
184+
Calls->push({DirectCB, -1});
167185
}
168186
if (Calls->empty())
169187
return PreservedAnalyses::all();

llvm/lib/Transforms/Utils/CallPromotionUtils.cpp

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -623,34 +623,37 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee,
623623
// All the ctx-es belonging to a function must have the same size counters.
624624
Ctx.resizeCounters(NewCountersSize);
625625

626-
// Maybe in this context, the indirect callsite wasn't observed at all
626+
// Maybe in this context, the indirect callsite wasn't observed at all. That
627+
// would make both direct and indirect BBs cold - which is what we already
628+
// have from resising the counters.
627629
if (!Ctx.hasCallsite(CSIndex))
628630
return;
629631
auto &CSData = Ctx.callsite(CSIndex);
630-
auto It = CSData.find(CalleeGUID);
631632

632-
// Maybe we did notice the indirect callsite, but to other targets.
633-
if (It == CSData.end())
634-
return;
635-
636-
assert(CalleeGUID == It->second.guid());
637-
638-
uint32_t DirectCount = It->second.getEntrycount();
639-
uint32_t TotalCount = 0;
633+
uint64_t TotalCount = 0;
640634
for (const auto &[_, V] : CSData)
641635
TotalCount += V.getEntrycount();
636+
uint64_t DirectCount = 0;
637+
// If we called the direct target, update the DirectCount. If we didn't, we
638+
// still want to update the indirect BB (to which the TotalCount goes, in
639+
// that case).
640+
if (auto It = CSData.find(CalleeGUID); It != CSData.end()) {
641+
assert(CalleeGUID == It->second.guid());
642+
DirectCount = It->second.getEntrycount();
643+
// This direct target needs to be moved to this caller under the
644+
// newly-allocated callsite index.
645+
assert(Ctx.callsites().count(NewCSID) == 0);
646+
Ctx.ingestContext(NewCSID, std::move(It->second));
647+
CSData.erase(CalleeGUID);
648+
}
649+
642650
assert(TotalCount >= DirectCount);
643-
uint32_t IndirectCount = TotalCount - DirectCount;
651+
uint64_t IndirectCount = TotalCount - DirectCount;
644652
// The ICP's effect is as-if the direct BB would have been taken DirectCount
645653
// times, and the indirect BB, IndirectCount times
646654
Ctx.counters()[DirectID] = DirectCount;
647655
Ctx.counters()[IndirectID] = IndirectCount;
648656

649-
// This particular indirect target needs to be moved to this caller under
650-
// the newly-allocated callsite index.
651-
assert(Ctx.callsites().count(NewCSID) == 0);
652-
Ctx.ingestContext(NewCSID, std::move(It->second));
653-
CSData.erase(CalleeGUID);
654657
};
655658
CtxProf.update(ProfileUpdater, &Caller);
656659
return &DirectCall;
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; RUN: split-file %s %t
2+
; RUN: llvm-ctxprof-util fromJSON --input %t/profile.json --output %t/profile.ctxprofdata
3+
;
4+
; In the given profile, in one of the contexts the indirect call is taken, the
5+
; target we're trying to ICP - GUID:2000 - doesn't appear at all. That should
6+
; contribute to the count of the "indirect call BB".
7+
; RUN: opt %t/test.ll -S -passes='require<ctx-prof-analysis>,module-inline,ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata -ctx-prof-promote-alwaysinline
8+
9+
; CHECK-LABEL: define i32 @caller(ptr %c)
10+
; CHECK-NEXT: [[CND:[0-9]+]] = icmp eq ptr %c, @one
11+
; CHECK-NEXT: br i1 [[CND]], label %{{.*}}, label %{{.*}}, !prof ![[BW:[0-9]+]]
12+
13+
; CHECK: ![[BW]] = !{!"branch_weights", i32 10, i32 10}
14+
15+
;--- test.ll
16+
declare i32 @external(i32 %x)
17+
define i32 @one() #0 !guid !0 {
18+
call void @llvm.instrprof.increment(ptr @one, i64 123, i32 1, i32 0)
19+
call void @llvm.instrprof.callsite(ptr @one, i64 123, i32 1, i32 0, ptr @external)
20+
%ret = call i32 @external(i32 1)
21+
ret i32 %ret
22+
}
23+
24+
define i32 @caller(ptr %c) #1 !guid !1 {
25+
call void @llvm.instrprof.increment(ptr @caller, i64 567, i32 1, i32 0)
26+
call void @llvm.instrprof.callsite(ptr @caller, i64 567, i32 1, i32 0, ptr %c)
27+
%ret = call i32 %c()
28+
ret i32 %ret
29+
}
30+
31+
define i32 @root(ptr %c) !guid !2 {
32+
call void @llvm.instrprof.increment(ptr @root, i64 432, i32 1, i32 0)
33+
call void @llvm.instrprof.callsite(ptr @root, i64 432, i32 2, i32 0, ptr @caller)
34+
%a = call i32 @caller(ptr %c)
35+
call void @llvm.instrprof.callsite(ptr @root, i64 432, i32 2, i32 1, ptr @caller)
36+
%b = call i32 @caller(ptr %c)
37+
%ret = add i32 %a, %b
38+
ret i32 %ret
39+
40+
}
41+
42+
attributes #0 = { alwaysinline }
43+
attributes #1 = { noinline }
44+
!0 = !{i64 1000}
45+
!1 = !{i64 3000}
46+
!2 = !{i64 4000}
47+
48+
;--- profile.json
49+
[ {
50+
"Guid": 4000, "Counters":[10], "Callsites": [
51+
[{"Guid":3000, "Counters":[10], "Callsites":[[{"Guid":1000, "Counters":[10]}]]}],
52+
[{"Guid":3000, "Counters":[10], "Callsites":[[{"Guid":9000, "Counters":[10]}]]}]
53+
]
54+
}
55+
]

0 commit comments

Comments
 (0)