Skip to content

Commit ba4da5a

Browse files
authored
[ctx_prof] "Use" support for pre-thinlink. (#101338)
There is currently no plan to support contextual profiling use in a non- ThinLTO scenario. In the pre-link phase, we only instrument and then immediately bail out to let the linker group functions under an entrypoint in the same module as the entrypoint. We don't actually care what the profile contains - just that we want to use a contextual profile. After that, in post-thinlink, we require the profile be passed again so we can actually use it. The earlier instrumentation will be used to match counter values. While the feature is in development, we add a hidden flag for the use scenario, but we can eventually tie it to the `PGOOptions` mechanism. We will use the same flag in both pre- and post-thinlink, because it simplifies things - usually the post-thinlink args are the same as the ones for pre-. This, despite the flag being basically treated as a boolean in pre-thinlink.
1 parent ac319a8 commit ba4da5a

File tree

2 files changed

+57
-2
lines changed

2 files changed

+57
-2
lines changed

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,10 @@ static cl::opt<bool> UseLoopVersioningLICM(
304304
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
305305
cl::desc("Enable the experimental Loop Versioning LICM pass"));
306306

307+
static cl::opt<std::string>
308+
UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden,
309+
cl::desc("Use the specified contextual profile file"));
310+
307311
namespace llvm {
308312
extern cl::opt<bool> EnableMemProfContextDisambiguation;
309313

@@ -1176,8 +1180,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
11761180
// Enable contextual profiling instrumentation.
11771181
const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
11781182
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
1183+
const bool IsCtxProfUse = !UseCtxProfile.empty() && !PGOOpt &&
1184+
Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
11791185

1180-
if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen)
1186+
if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
1187+
IsCtxProfUse)
11811188
addPreInlinerPasses(MPM, Level, Phase);
11821189

11831190
// Add all the requested passes for instrumentation PGO, if requested.
@@ -1187,8 +1194,13 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
11871194
/*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
11881195
PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
11891196
PGOOpt->FS);
1190-
} else if (IsCtxProfGen) {
1197+
} else if (IsCtxProfGen || IsCtxProfUse) {
11911198
MPM.addPass(PGOInstrumentationGen(false));
1199+
// In pre-link, we just want the instrumented IR. We use the contextual
1200+
// profile in the post-thinlink phase.
1201+
// The instrumentation will be removed in post-thinlink after IPO.
1202+
if (IsCtxProfUse)
1203+
return MPM;
11921204
addPostPGOLoopRotation(MPM, Level);
11931205
MPM.addPass(PGOCtxProfLoweringPass());
11941206
}
@@ -1655,6 +1667,11 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
16551667
// can.
16561668
MPM.addPass(buildModuleSimplificationPipeline(
16571669
Level, ThinOrFullLTOPhase::ThinLTOPreLink));
1670+
// In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
1671+
// thinlto use the contextual info to perform imports; then use the contextual
1672+
// profile in the post-thinlink phase.
1673+
if (!UseCtxProfile.empty() && !PGOOpt)
1674+
return MPM;
16581675

16591676
// Run partial inlining pass to partially inline functions that have
16601677
// large bodies.
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; There is no profile, but that's OK because the prelink does not care about
3+
; the content of the profile, just that we intend to use one.
4+
; There is no scenario currently of doing ctx profile use without thinlto.
5+
;
6+
; RUN: opt -passes='thinlto-pre-link<O2>' -use-ctx-profile=something_that_does_not_exist %s -S | FileCheck %s
7+
8+
declare void @bar()
9+
10+
define void @foo(i32 %a, ptr %fct) {
11+
; CHECK-LABEL: define void @foo(
12+
; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr {
13+
; CHECK-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0
14+
; CHECK-NEXT: br i1 [[T]], label %[[YES:.*]], label %[[NO:.*]]
15+
; CHECK: [[YES]]:
16+
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
17+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[FCT]] to i64
18+
; CHECK-NEXT: call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 728453322856651412, i64 [[TMP1]], i32 0, i32 0)
19+
; CHECK-NEXT: call void [[FCT]](i32 0)
20+
; CHECK-NEXT: br label %[[EXIT:.*]]
21+
; CHECK: [[NO]]:
22+
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
23+
; CHECK-NEXT: call void @bar()
24+
; CHECK-NEXT: br label %[[EXIT]]
25+
; CHECK: [[EXIT]]:
26+
; CHECK-NEXT: ret void
27+
;
28+
%t = icmp eq i32 %a, 0
29+
br i1 %t, label %yes, label %no
30+
yes:
31+
call void %fct(i32 %a)
32+
br label %exit
33+
no:
34+
call void @bar()
35+
br label %exit
36+
exit:
37+
ret void
38+
}

0 commit comments

Comments
 (0)