Skip to content

Commit 131854f

Browse files
committed
[ctxprof] Use the flattened contextual profile pre-thinlink
1 parent 3489b48 commit 131854f

File tree

11 files changed

+112
-101
lines changed

11 files changed

+112
-101
lines changed

llvm/include/llvm/Analysis/ProfileSummaryInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class ProfileSummaryInfo {
6464
ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default;
6565

6666
/// If no summary is present, attempt to refresh.
67-
void refresh();
67+
void refresh(std::unique_ptr<ProfileSummary> &&Other = nullptr);
6868

6969
/// Returns true if profile summary is available.
7070
bool hasProfileSummary() const { return Summary != nullptr; }

llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,11 @@ namespace llvm {
1717

1818
class PGOCtxProfFlatteningPass
1919
: public PassInfoMixin<PGOCtxProfFlatteningPass> {
20+
const bool IsPreThinlink;
21+
2022
public:
21-
explicit PGOCtxProfFlatteningPass() = default;
23+
explicit PGOCtxProfFlatteningPass(bool IsPreThinlink)
24+
: IsPreThinlink(IsPreThinlink) {}
2225
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
2326
};
2427
} // namespace llvm

llvm/lib/Analysis/CtxProfAnalysis.cpp

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ static cl::opt<CtxProfAnalysisPrinterPass::PrintMode> PrintLevel(
3939
"just the yaml representation of the profile")),
4040
cl::desc("Verbosity level of the contextual profile printer pass."));
4141

42+
static cl::opt<bool> ForceIsInSpecializedModule(
43+
"ctx-profile-force-is-specialized", cl::init(false),
44+
cl::desc("Treat the given module as-if it were containing the "
45+
"post-thinlink module containing the root"));
46+
4247
const char *AssignGUIDPass::GUIDMetadataName = "guid";
4348

4449
PreservedAnalyses AssignGUIDPass::run(Module &M, ModuleAnalysisManager &MAM) {
@@ -278,6 +283,12 @@ void PGOContextualProfile::initIndex() {
278283
});
279284
}
280285

286+
bool PGOContextualProfile::isInSpecializedModule() const {
287+
return ForceIsInSpecializedModule.getNumOccurrences() > 0
288+
? ForceIsInSpecializedModule
289+
: IsInSpecializedModule;
290+
}
291+
281292
void PGOContextualProfile::update(Visitor V, const Function &F) {
282293
assert(isFunctionKnown(F));
283294
GlobalValue::GUID G = getDefinedFunctionGUID(F);
@@ -299,20 +310,27 @@ void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {
299310

300311
const CtxProfFlatProfile PGOContextualProfile::flatten() const {
301312
CtxProfFlatProfile Flat;
313+
auto Accummulate = +[](SmallVectorImpl<uint64_t> &Into,
314+
const SmallVectorImpl<uint64_t> &From) {
315+
if (Into.empty())
316+
Into.resize(From.size());
317+
assert(Into.size() == From.size() &&
318+
"All contexts corresponding to a function should have the exact "
319+
"same number of counters.");
320+
for (size_t I = 0, E = Into.size(); I < E; ++I)
321+
Into[I] += From[I];
322+
};
323+
302324
preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
303325
const PGOCtxProfContext>(
304326
Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
305-
auto [It, Ins] = Flat.insert({Ctx.guid(), {}});
306-
if (Ins) {
307-
llvm::append_range(It->second, Ctx.counters());
308-
return;
309-
}
310-
assert(It->second.size() == Ctx.counters().size() &&
311-
"All contexts corresponding to a function should have the exact "
312-
"same number of counters.");
313-
for (size_t I = 0, E = It->second.size(); I < E; ++I)
314-
It->second[I] += Ctx.counters()[I];
327+
Accummulate(Flat[Ctx.guid()], Ctx.counters());
315328
});
329+
for (const auto &[_, RC] : Profiles.Contexts)
330+
for (const auto &[G, Unh] : RC.getUnhandled())
331+
Accummulate(Flat[G], Unh);
332+
for (const auto &[G, FC] : Profiles.FlatProfiles)
333+
Accummulate(Flat[G], FC);
316334
return Flat;
317335
}
318336

llvm/lib/Analysis/ProfileSummaryInfo.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,11 @@ static cl::opt<double> PartialSampleProfileWorkingSetSizeScaleFactor(
4747
// any backend passes (IR level instrumentation, for example). This method
4848
// checks if the Summary is null and if so checks if the summary metadata is now
4949
// available in the module and parses it to get the Summary object.
50-
void ProfileSummaryInfo::refresh() {
50+
void ProfileSummaryInfo::refresh(std::unique_ptr<ProfileSummary> &&Other) {
51+
if (Other) {
52+
Summary.swap(Other);
53+
return;
54+
}
5155
if (hasProfileSummary())
5256
return;
5357
// First try to get context sensitive ProfileSummary.

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,7 +1048,7 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
10481048
if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) {
10491049
MPM.addPass(GlobalOptPass());
10501050
MPM.addPass(GlobalDCEPass());
1051-
MPM.addPass(PGOCtxProfFlatteningPass());
1051+
MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
10521052
}
10531053

10541054
MPM.addPass(createModuleToFunctionPassAdaptor(
@@ -1242,8 +1242,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
12421242
// FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
12431243
// mechanism for GUIDs.
12441244
MPM.addPass(AssignGUIDPass());
1245-
if (IsCtxProfUse)
1245+
if (IsCtxProfUse) {
1246+
MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
12461247
return MPM;
1248+
}
12471249
// Block further inlining in the instrumented ctxprof case. This avoids
12481250
// confusingly collecting profiles for the same GUID corresponding to
12491251
// different variants of the function. We could do like PGO and identify

llvm/lib/Passes/PassRegistry.def

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ MODULE_PASS("coro-early", CoroEarlyPass())
6161
MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
6262
MODULE_PASS("ctx-instr-gen",
6363
PGOInstrumentationGen(PGOInstrumentationType::CTXPROF))
64-
MODULE_PASS("ctx-prof-flatten", PGOCtxProfFlatteningPass())
64+
MODULE_PASS("ctx-prof-flatten", PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false))
65+
MODULE_PASS("ctx-prof-flatten-prethinlink",
66+
PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true))
6567
MODULE_PASS("noinline-nonprevailing", NoinlineNonPrevailing())
6668
MODULE_PASS("deadargelim", DeadArgumentEliminationPass())
6769
MODULE_PASS("debugify", NewPMDebugifyPass())

llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ class ProfileAnnotator final {
185185
// To be accessed through getBBInfo() after construction.
186186
std::map<const BasicBlock *, BBInfo> BBInfos;
187187
std::vector<EdgeInfo> EdgeInfos;
188-
InstrProfSummaryBuilder &PB;
189188

190189
// This is an adaptation of PGOUseFunc::populateCounters.
191190
// FIXME(mtrofin): look into factoring the code to share one implementation.
@@ -284,9 +283,8 @@ class ProfileAnnotator final {
284283
}
285284

286285
public:
287-
ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters,
288-
InstrProfSummaryBuilder &PB)
289-
: F(F), Counters(Counters), PB(PB) {
286+
ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters)
287+
: F(F), Counters(Counters) {
290288
assert(!F.isDeclaration());
291289
assert(!Counters.empty());
292290
size_t NrEdges = 0;
@@ -351,8 +349,6 @@ class ProfileAnnotator final {
351349
(TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
352350
setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount},
353351
std::max(TrueCount, FalseCount));
354-
PB.addInternalCount(TrueCount);
355-
PB.addInternalCount(FalseCount);
356352
}
357353
}
358354
}
@@ -364,7 +360,6 @@ class ProfileAnnotator final {
364360
assert(!Counters.empty());
365361
propagateCounterValues(Counters);
366362
F.setEntryCount(Counters[0]);
367-
PB.addEntryCount(Counters[0]);
368363

369364
for (auto &BB : F) {
370365
const auto &BBInfo = getBBInfo(BB);
@@ -381,7 +376,6 @@ class ProfileAnnotator final {
381376
if (EdgeCount > MaxCount)
382377
MaxCount = EdgeCount;
383378
EdgeCounts[SuccIdx] = EdgeCount;
384-
PB.addInternalCount(EdgeCount);
385379
}
386380

387381
if (MaxCount != 0)
@@ -431,16 +425,20 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
431425
// e.g. synthetic weights, etc) because it wouldn't interfere with the
432426
// contextual - based one (which would be in other modules)
433427
auto OnExit = llvm::make_scope_exit([&]() {
428+
if (IsPreThinlink)
429+
return;
434430
for (auto &F : M)
435431
removeInstrumentation(F);
436432
});
437433
auto &CtxProf = MAM.getResult<CtxProfAnalysis>(M);
438-
if (CtxProf.contexts().empty())
434+
// post-thinlink, we only reprocess for the module(s) containing the
435+
// contextual tree. For everything else, OnExit will just clean the
436+
// instrumentation.
437+
if (!IsPreThinlink && !CtxProf.isInSpecializedModule())
439438
return PreservedAnalyses::none();
440439

441440
const auto FlattenedProfile = CtxProf.flatten();
442441

443-
InstrProfSummaryBuilder PB(ProfileSummaryBuilder::DefaultCutoffs);
444442
for (auto &F : M) {
445443
if (F.isDeclaration())
446444
continue;
@@ -456,15 +454,26 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
456454
if (It == FlattenedProfile.end())
457455
clearColdFunctionProfile(F);
458456
else {
459-
ProfileAnnotator S(F, It->second, PB);
457+
ProfileAnnotator S(F, It->second);
460458
S.assignProfileData();
461459
}
462460
}
463-
464-
auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
461+
InstrProfSummaryBuilder PB(ProfileSummaryBuilder::DefaultCutoffs);
462+
// use here the flat profiles just so the importer doesn't complain about
463+
// how different the PSIs are between the module with the roots and the
464+
// various modules it imports.
465+
for (auto &C : FlattenedProfile) {
466+
PB.addEntryCount(C.second[0]);
467+
for (auto V : llvm::drop_begin(C.second))
468+
PB.addInternalCount(V);
469+
}
465470

466471
M.setProfileSummary(PB.getSummary()->getMD(M.getContext()),
467472
ProfileSummary::Kind::PSK_Instr);
468-
PSI.refresh();
473+
PreservedAnalyses PA;
474+
PA.abandon<ProfileSummaryAnalysis>();
475+
MAM.invalidate(M, PA);
476+
auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
477+
PSI.refresh(PB.getSummary());
469478
return PreservedAnalyses::none();
470479
}

llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
; RUN: split-file %s %t
55
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile.yaml --output=%t/profile.ctxprofdata
66
; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' -use-ctx-profile=%t/profile.ctxprofdata \
7-
; RUN: %t/example.ll -S -o %t/prelink.ll
8-
; RUN: FileCheck --input-file %t/prelink.ll %s --check-prefix=PRELINK
9-
; RUN: opt -passes='ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata %t/prelink.ll -S | FileCheck %s
7+
; RUN: %t/example.ll -S -o %t/4909520559318251808.ll
8+
; RUN: FileCheck --input-file %t/4909520559318251808.ll %s --check-prefix=PRELINK
9+
10+
; RUN: opt -passes='ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata %t/4909520559318251808.ll -S | FileCheck %s --check-prefix=POSTLINK
1011
;
1112
;
1213
; Check that instrumentation occurs where expected: the "no" block for both foo and
@@ -18,57 +19,73 @@
1819
; PRELINK-NEXT: call void @llvm.instrprof.increment(ptr @foo, i64 [[#]], i32 2, i32 1)
1920

2021
; PRELINK-LABEL: @an_entrypoint
22+
; PRELINK: br i1 %t, label %yes, label %common.ret, !prof ![[PREPROF:[0-9]+]]
2123
; PRELINK-LABEL: yes:
2224
; PRELINK-NEXT: call void @llvm.instrprof.increment(ptr @an_entrypoint, i64 [[#]], i32 2, i32 1)
23-
; PRELINK-NOT: "ProfileSummary"
25+
; PRELINK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
26+
; PRELINK: ![[#]] = !{!"TotalCount", i64 3595}
27+
; PRELINK: ![[#]] = !{!"MaxCount", i64 3000}
28+
; PRELINK: ![[#]] = !{!"MaxInternalCount", i64 3000}
29+
; PRELINK: ![[#]] = !{!"MaxFunctionCount", i64 300}
30+
; PRELINK: ![[#]] = !{!"NumCounts", i64 6}
31+
; PRELINK: ![[#]] = !{!"NumFunctions", i64 3}
32+
; PRELINK: ![[PREPROF]] = !{!"branch_weights", i32 40, i32 60}
2433

2534
; Check that the output has:
2635
; - no instrumentation
2736
; - the 2 functions have an entry count
2837
; - each conditional branch has profile annotation
2938
;
30-
; CHECK-NOT: call void @llvm.instrprof
39+
; POSTLINK-NOT: call void @llvm.instrprof
3140
;
3241
; make sure we have function entry counts, branch weights, and a profile summary.
33-
; CHECK-LABEL: @foo
34-
; CHECK-SAME: !prof ![[FOO_EP:[0-9]+]]
35-
; CHECK: br i1 %t, label %yes, label %no, !prof ![[FOO_BW:[0-9]+]]
36-
; CHECK-LABEL: @an_entrypoint
37-
; CHECK-SAME: !prof ![[AN_ENTRYPOINT_EP:[0-9]+]]
38-
; CHECK: br i1 %t, label %yes, label %common.ret, !prof ![[AN_ENTRYPOINT_BW:[0-9]+]]
42+
; POSTLINK-LABEL: @foo
43+
; POSTLINK-SAME: !prof ![[FOO_EP:[0-9]+]]
44+
; POSTLINK: br i1 %t, label %yes, label %no, !prof ![[FOO_BW:[0-9]+]]
45+
; POSTLINK-LABEL: @an_entrypoint
46+
; POSTLINK-SAME: !prof ![[AN_ENTRYPOINT_EP:[0-9]+]]
47+
; POSTLINK: br i1 %t, label %yes, label %common.ret, !prof ![[AN_ENTRYPOINT_BW:[0-9]+]]
3948

49+
; The postlink summary is restricted to the stuff under the root - including the
50+
; "unhandled" data.
51+
; POSTLINK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
52+
; POSTLINK: ![[#]] = !{!"TotalCount", i64 1495}
53+
; POSTLINK: ![[#]] = !{!"MaxCount", i64 1000}
54+
; POSTLINK: ![[#]] = !{!"MaxInternalCount", i64 1000}
55+
; POSTLINK: ![[#]] = !{!"MaxFunctionCount", i64 200}
56+
; POSTLINK: ![[#]] = !{!"NumCounts", i64 6}
57+
; POSTLINK: ![[#]] = !{!"NumFunctions", i64 3}
4058

41-
; CHECK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
42-
; CHECK: ![[#]] = !{!"TotalCount", i64 480}
43-
; CHECK: ![[#]] = !{!"MaxCount", i64 140}
44-
; CHECK: ![[#]] = !{!"MaxInternalCount", i64 125}
45-
; CHECK: ![[#]] = !{!"MaxFunctionCount", i64 140}
46-
; CHECK: ![[#]] = !{!"NumCounts", i64 6}
47-
; CHECK: ![[#]] = !{!"NumFunctions", i64 2}
4859
;
4960
; @foo will be called both unconditionally and conditionally, on the "yes" branch
5061
; which has a count of 40. So 140 times.
5162

52-
; CHECK: ![[FOO_EP]] = !{!"function_entry_count", i64 140}
63+
; POSTLINK: ![[FOO_EP]] = !{!"function_entry_count", i64 140}
5364

5465
; foo's "no" branch is taken 10+5 times (from the 2 contexts belonging to foo).
5566
; Which means its "yes" branch is taken 140 - 15 times.
5667

57-
; CHECK: ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15}
58-
; CHECK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100}
59-
; CHECK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60}
68+
; POSTLINK: ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15}
69+
; POSTLINK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100}
70+
; POSTLINK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60}
6071

6172
;--- profile.yaml
6273
Contexts:
6374
- Guid: 4909520559318251808
6475
TotalRootEntryCount: 100
76+
Unhandled:
77+
- Guid: 1234
78+
Counters: [200, 1000]
6579
Counters: [100, 40]
6680
Callsites: -
6781
- Guid: 11872291593386833696
6882
Counters: [ 100, 5 ]
6983
-
7084
- Guid: 11872291593386833696
7185
Counters: [ 40, 10 ]
86+
FlatProfiles:
87+
- Guid: 1234
88+
Counters: [ 100, 2000 ]
7289
;--- example.ll
7390
declare void @bar()
7491

llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile_pump.yaml --output=%t/profile_pump.ctxprofdata
88
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile_unreachable.yaml --output=%t/profile_unreachable.ctxprofdata
99
;
10-
; RUN: opt -passes=ctx-prof-flatten %t/example_ok.ll -use-ctx-profile=%t/profile_ok.ctxprofdata -S -o - | FileCheck %s
11-
; RUN: not --crash opt -passes=ctx-prof-flatten %t/message_pump.ll -use-ctx-profile=%t/profile_pump.ctxprofdata -S 2>&1 | FileCheck %s --check-prefix=ASSERTION
12-
; RUN: not --crash opt -passes=ctx-prof-flatten %t/unreachable.ll -use-ctx-profile=%t/profile_unreachable.ctxprofdata -S 2>&1 | FileCheck %s --check-prefix=ASSERTION
10+
; RUN: opt -passes=ctx-prof-flatten %t/example_ok.ll -ctx-profile-force-is-specialized -use-ctx-profile=%t/profile_ok.ctxprofdata -S -o - | FileCheck %s
11+
; RUN: not --crash opt -passes=ctx-prof-flatten %t/message_pump.ll -ctx-profile-force-is-specialized -use-ctx-profile=%t/profile_pump.ctxprofdata -S 2>&1 | FileCheck %s --check-prefix=ASSERTION
12+
; RUN: not --crash opt -passes=ctx-prof-flatten %t/unreachable.ll -ctx-profile-force-is-specialized -use-ctx-profile=%t/profile_unreachable.ctxprofdata -S 2>&1 | FileCheck %s --check-prefix=ASSERTION
1313

1414
; CHECK: br i1 %x, label %b1, label %exit, !prof ![[PROF1:[0-9]+]]
1515
; CHECK: br i1 %y, label %blk, label %exit, !prof ![[PROF2:[0-9]+]]

llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; Check that flattened profile lowering handles cold subgraphs that end in "unreachable"
22
; RUN: split-file %s %t
33
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile.yaml --output=%t/profile.ctxprofdata
4-
; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s
4+
; RUN: opt -passes=ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s
55

66
; CHECK-LABEL: entry:
77
; CHECK: br i1 %t, label %yes, label %no, !prof ![[C1:[0-9]+]]
@@ -16,7 +16,7 @@
1616
; CHECK-NOT: !prof
1717
; CHECK: ![[C1]] = !{!"branch_weights", i32 6, i32 0}
1818

19-
;--- example.ll
19+
;--- 1234.ll
2020
define void @f1(i32 %cond) !guid !0 {
2121
entry:
2222
call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 0)

0 commit comments

Comments
 (0)