Skip to content

Commit 442050c

Browse files
authored
[ctxprof] Flatten indirect call info in pre-thinlink compilation (#134766)
Same idea as in #134723 - flatten indirect call info in `"VP"` `MD_prof` metadata for the thinlinker, for cases that aren't covered by a contextual profile. If we don't ICP an indirect call target in the specialized module, the call will fall to the copy of that target outside the specialized module. If the graph under that target also has some indirect calls, in the absence of this pass, we'd have a steeper performance regression - because none of those would have a chance to be ICPed.
1 parent 4c90d97 commit 442050c

File tree

4 files changed

+125
-0
lines changed

4 files changed

+125
-0
lines changed

llvm/include/llvm/Analysis/CtxProfAnalysis.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ namespace llvm {
2121

2222
class CtxProfAnalysis;
2323

24+
using FlatIndirectTargets = DenseMap<GlobalValue::GUID, uint64_t>;
25+
using CtxProfFlatIndirectCallProfile =
26+
DenseMap<GlobalValue::GUID, DenseMap<uint32_t, FlatIndirectTargets>>;
27+
2428
/// The instrumented contextual profile, produced by the CtxProfAnalysis.
2529
class PGOContextualProfile {
2630
friend class CtxProfAnalysis;
@@ -101,6 +105,7 @@ class PGOContextualProfile {
101105
void visit(ConstVisitor, const Function *F = nullptr) const;
102106

103107
const CtxProfFlatProfile flatten() const;
108+
const CtxProfFlatIndirectCallProfile flattenVirtCalls() const;
104109

105110
bool invalidate(Module &, const PreservedAnalyses &PA,
106111
ModuleAnalysisManager::Invalidator &) {

llvm/lib/Analysis/CtxProfAnalysis.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,20 @@ const CtxProfFlatProfile PGOContextualProfile::flatten() const {
334334
return Flat;
335335
}
336336

337+
const CtxProfFlatIndirectCallProfile
338+
PGOContextualProfile::flattenVirtCalls() const {
339+
CtxProfFlatIndirectCallProfile Ret;
340+
preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
341+
const PGOCtxProfContext>(
342+
Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
343+
auto &Targets = Ret[Ctx.guid()];
344+
for (const auto &[ID, SubctxSet] : Ctx.callsites())
345+
for (const auto &Subctx : SubctxSet)
346+
Targets[ID][Subctx.first] += Subctx.second.getEntrycount();
347+
});
348+
return Ret;
349+
}
350+
337351
void CtxProfAnalysis::collectIndirectCallPromotionList(
338352
CallBase &IC, Result &Profile,
339353
SetVector<std::pair<CallBase *, Function *>> &Candidates) {

llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,12 @@
3636
#include "llvm/Transforms/Scalar/DCE.h"
3737
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
3838
#include <deque>
39+
#include <functional>
3940

4041
using namespace llvm;
4142

43+
#define DEBUG_TYPE "ctx_prof_flatten"
44+
4245
namespace {
4346

4447
class ProfileAnnotator final {
@@ -414,6 +417,57 @@ void removeInstrumentation(Function &F) {
414417
I.eraseFromParent();
415418
}
416419

420+
void annotateIndirectCall(
421+
Module &M, CallBase &CB,
422+
const DenseMap<uint32_t, FlatIndirectTargets> &FlatProf,
423+
const InstrProfCallsite &Ins) {
424+
auto Idx = Ins.getIndex()->getZExtValue();
425+
auto FIt = FlatProf.find(Idx);
426+
if (FIt == FlatProf.end())
427+
return;
428+
const auto &Targets = FIt->second;
429+
SmallVector<InstrProfValueData, 2> Data;
430+
uint64_t Sum = 0;
431+
for (auto &[Guid, Count] : Targets) {
432+
Data.push_back({/*.Value=*/Guid, /*.Count=*/Count});
433+
Sum += Count;
434+
}
435+
436+
llvm::sort(Data,
437+
[](const InstrProfValueData &A, const InstrProfValueData &B) {
438+
return A.Count > B.Count;
439+
});
440+
llvm::annotateValueSite(M, CB, Data, Sum,
441+
InstrProfValueKind::IPVK_IndirectCallTarget,
442+
Data.size());
443+
LLVM_DEBUG(dbgs() << "[ctxprof] flat indirect call prof: " << CB
444+
<< CB.getMetadata(LLVMContext::MD_prof) << "\n");
445+
}
446+
447+
// We normally return a "Changed" bool, but the calling pass' run assumes
448+
// something will change - some profile will be added - so this won't add much
449+
// by returning false when applicable.
450+
void annotateIndirectCalls(Module &M, const CtxProfAnalysis::Result &CtxProf) {
451+
const auto FlatIndCalls = CtxProf.flattenVirtCalls();
452+
for (auto &F : M) {
453+
if (F.isDeclaration())
454+
continue;
455+
auto FlatProfIter = FlatIndCalls.find(AssignGUIDPass::getGUID(F));
456+
if (FlatProfIter == FlatIndCalls.end())
457+
continue;
458+
const auto &FlatProf = FlatProfIter->second;
459+
for (auto &BB : F) {
460+
for (auto &I : BB) {
461+
auto *CB = dyn_cast<CallBase>(&I);
462+
if (!CB || !CB->isIndirectCall())
463+
continue;
464+
if (auto *Ins = CtxProfAnalysis::getCallsiteInstrumentation(*CB))
465+
annotateIndirectCall(M, *CB, FlatProf, *Ins);
466+
}
467+
}
468+
}
469+
}
470+
417471
} // namespace
418472

419473
PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
@@ -437,6 +491,8 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
437491
if (!IsPreThinlink && !CtxProf.isInSpecializedModule())
438492
return PreservedAnalyses::none();
439493

494+
if (IsPreThinlink)
495+
annotateIndirectCalls(M, CtxProf);
440496
const auto FlattenedProfile = CtxProf.flatten();
441497

442498
for (auto &F : M) {
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
; REQUIRES:x86_64-linux
2+
3+
; Test flattening indirect calls into "VP" MD_prof metadata, in prelink.
4+
5+
; RUN: split-file %s %t
6+
; RUN: llvm-ctxprof-util fromYAML --input %t/profile.yaml --output %t/profile.ctxprofdata
7+
; RUN: opt -passes=ctx-prof-flatten-prethinlink %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata \
8+
; RUN: -S -o - | FileCheck %s --check-prefix=PRELINK
9+
10+
; PRELINK: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 0, ptr %p)
11+
; PRELINK-NEXT: call void %p(), !prof ![[VPPROF:[0-9]+]]
12+
; PRELINK-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
13+
; PRELINK-NEXT: call void @bar(){{$}}
14+
; PRELINK: ![[VPPROF]] = !{!"VP", i32 0, i64 5, i64 5678, i64 4, i64 5555, i64 1}
15+
16+
; RUN: cp %t/example.ll %t/1234.ll
17+
; RUN: opt -passes=ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata \
18+
; RUN: -S -o - | FileCheck %s --check-prefix=POSTLINK
19+
; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata \
20+
; RUN: -S -o - | FileCheck %s --check-prefix=POSTLINK
21+
22+
; POSTLINK-NOT: call void %p(), !prof
23+
;--- example.ll
24+
25+
declare !guid !0 void @bar()
26+
27+
define void @foo(ptr %p) !guid !1 {
28+
call void @llvm.instrprof.increment(ptr @foo, i64 1234, i32 1, i32 0)
29+
call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 0, ptr %p)
30+
call void %p()
31+
call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
32+
call void @bar()
33+
ret void
34+
}
35+
36+
!0 = !{i64 8888}
37+
!1 = !{i64 1234}
38+
39+
;--- profile.yaml
40+
Contexts:
41+
- Guid: 1234
42+
TotalRootEntryCount: 5
43+
Counters: [5]
44+
Callsites:
45+
- - Guid: 5555
46+
Counters: [1]
47+
- Guid: 5678
48+
Counters: [4]
49+
- - Guid: 8888
50+
Counters: [5]

0 commit comments

Comments
 (0)