-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ctxprof] Use the flattened contextual profile pre-thinlink #134723
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ctxprof] Use the flattened contextual profile pre-thinlink #134723
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-pgo Author: Mircea Trofin (mtrofin) ChangesFlatten the profile pre-thinlink so that ThinLTO has something to work with for the parts of the binary that aren't covered by contextual profiles. Post-thinlink, the flattener is re-run and will actually change profile info, but just for the modules containing contextual trees ("specialized modules"). For the rest, the flattener just yanks out the instrumentation. Patch is 21.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134723.diff 12 Files Affected:
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index f7f65458c16a9..f5410238d9f42 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -59,7 +59,7 @@ class PGOContextualProfile {
// True if this module is a post-thinlto module containing just functions
// participating in one or more contextual profiles.
- bool isInSpecializedModule() const { return IsInSpecializedModule; }
+ bool isInSpecializedModule() const;
bool isFunctionKnown(const Function &F) const {
return getDefinedFunctionGUID(F) != 0;
diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index ceae3e8a0ddb9..2e32878760b79 100644
--- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -64,7 +64,7 @@ class ProfileSummaryInfo {
ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default;
/// If no summary is present, attempt to refresh.
- void refresh();
+ void refresh(std::unique_ptr<ProfileSummary> &&Other = nullptr);
/// Returns true if profile summary is available.
bool hasProfileSummary() const { return Summary != nullptr; }
diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
index 0eab3aaf6fcad..96ff265af4371 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
@@ -17,8 +17,11 @@ namespace llvm {
class PGOCtxProfFlatteningPass
: public PassInfoMixin<PGOCtxProfFlatteningPass> {
+ const bool IsPreThinlink;
+
public:
- explicit PGOCtxProfFlatteningPass() = default;
+ explicit PGOCtxProfFlatteningPass(bool IsPreThinlink)
+ : IsPreThinlink(IsPreThinlink) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};
} // namespace llvm
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 3ae333b09d0ce..4042c87369462 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -39,6 +39,11 @@ static cl::opt<CtxProfAnalysisPrinterPass::PrintMode> PrintLevel(
"just the yaml representation of the profile")),
cl::desc("Verbosity level of the contextual profile printer pass."));
+static cl::opt<bool> ForceIsInSpecializedModule(
+ "ctx-profile-force-is-specialized", cl::init(false),
+ cl::desc("Treat the given module as-if it were containing the "
+ "post-thinlink module containing the root"));
+
const char *AssignGUIDPass::GUIDMetadataName = "guid";
PreservedAnalyses AssignGUIDPass::run(Module &M, ModuleAnalysisManager &MAM) {
@@ -278,6 +283,12 @@ void PGOContextualProfile::initIndex() {
});
}
+bool PGOContextualProfile::isInSpecializedModule() const {
+ return ForceIsInSpecializedModule.getNumOccurrences() > 0
+ ? ForceIsInSpecializedModule
+ : IsInSpecializedModule;
+}
+
void PGOContextualProfile::update(Visitor V, const Function &F) {
assert(isFunctionKnown(F));
GlobalValue::GUID G = getDefinedFunctionGUID(F);
@@ -299,20 +310,27 @@ void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {
const CtxProfFlatProfile PGOContextualProfile::flatten() const {
CtxProfFlatProfile Flat;
+ auto Accummulate = +[](SmallVectorImpl<uint64_t> &Into,
+ const SmallVectorImpl<uint64_t> &From) {
+ if (Into.empty())
+ Into.resize(From.size());
+ assert(Into.size() == From.size() &&
+ "All contexts corresponding to a function should have the exact "
+ "same number of counters.");
+ for (size_t I = 0, E = Into.size(); I < E; ++I)
+ Into[I] += From[I];
+ };
+
preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
const PGOCtxProfContext>(
Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
- auto [It, Ins] = Flat.insert({Ctx.guid(), {}});
- if (Ins) {
- llvm::append_range(It->second, Ctx.counters());
- return;
- }
- assert(It->second.size() == Ctx.counters().size() &&
- "All contexts corresponding to a function should have the exact "
- "same number of counters.");
- for (size_t I = 0, E = It->second.size(); I < E; ++I)
- It->second[I] += Ctx.counters()[I];
+ Accummulate(Flat[Ctx.guid()], Ctx.counters());
});
+ for (const auto &[_, RC] : Profiles.Contexts)
+ for (const auto &[G, Unh] : RC.getUnhandled())
+ Accummulate(Flat[G], Unh);
+ for (const auto &[G, FC] : Profiles.FlatProfiles)
+ Accummulate(Flat[G], FC);
return Flat;
}
diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 1a6d2006202bc..d3d7fb997b9d3 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -47,7 +47,11 @@ static cl::opt<double> PartialSampleProfileWorkingSetSizeScaleFactor(
// any backend passes (IR level instrumentation, for example). This method
// checks if the Summary is null and if so checks if the summary metadata is now
// available in the module and parses it to get the Summary object.
-void ProfileSummaryInfo::refresh() {
+void ProfileSummaryInfo::refresh(std::unique_ptr<ProfileSummary> &&Other) {
+ if (Other) {
+ Summary.swap(Other);
+ return;
+ }
if (hasProfileSummary())
return;
// First try to get context sensitive ProfileSummary.
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index a18b36ba40754..322c1bc5e63d4 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1048,7 +1048,7 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
if (!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPostLink) {
MPM.addPass(GlobalOptPass());
MPM.addPass(GlobalDCEPass());
- MPM.addPass(PGOCtxProfFlatteningPass());
+ MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false));
}
MPM.addPass(createModuleToFunctionPassAdaptor(
@@ -1242,8 +1242,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
// mechanism for GUIDs.
MPM.addPass(AssignGUIDPass());
- if (IsCtxProfUse)
+ if (IsCtxProfUse) {
+ MPM.addPass(PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true));
return MPM;
+ }
// Block further inlining in the instrumented ctxprof case. This avoids
// confusingly collecting profiles for the same GUID corresponding to
// different variants of the function. We could do like PGO and identify
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 510a505995304..2ee7145c00cec 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -61,7 +61,9 @@ MODULE_PASS("coro-early", CoroEarlyPass())
MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
MODULE_PASS("ctx-instr-gen",
PGOInstrumentationGen(PGOInstrumentationType::CTXPROF))
-MODULE_PASS("ctx-prof-flatten", PGOCtxProfFlatteningPass())
+MODULE_PASS("ctx-prof-flatten", PGOCtxProfFlatteningPass(/*IsPreThinlink=*/false))
+MODULE_PASS("ctx-prof-flatten-prethinlink",
+ PGOCtxProfFlatteningPass(/*IsPreThinlink=*/true))
MODULE_PASS("noinline-nonprevailing", NoinlineNonPrevailing())
MODULE_PASS("deadargelim", DeadArgumentEliminationPass())
MODULE_PASS("debugify", NewPMDebugifyPass())
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index e6aa374a221da..ffe0f385047c3 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -185,7 +185,6 @@ class ProfileAnnotator final {
// To be accessed through getBBInfo() after construction.
std::map<const BasicBlock *, BBInfo> BBInfos;
std::vector<EdgeInfo> EdgeInfos;
- InstrProfSummaryBuilder &PB;
// This is an adaptation of PGOUseFunc::populateCounters.
// FIXME(mtrofin): look into factoring the code to share one implementation.
@@ -284,9 +283,8 @@ class ProfileAnnotator final {
}
public:
- ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters,
- InstrProfSummaryBuilder &PB)
- : F(F), Counters(Counters), PB(PB) {
+ ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters)
+ : F(F), Counters(Counters) {
assert(!F.isDeclaration());
assert(!Counters.empty());
size_t NrEdges = 0;
@@ -351,8 +349,6 @@ class ProfileAnnotator final {
(TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount},
std::max(TrueCount, FalseCount));
- PB.addInternalCount(TrueCount);
- PB.addInternalCount(FalseCount);
}
}
}
@@ -364,7 +360,6 @@ class ProfileAnnotator final {
assert(!Counters.empty());
propagateCounterValues(Counters);
F.setEntryCount(Counters[0]);
- PB.addEntryCount(Counters[0]);
for (auto &BB : F) {
const auto &BBInfo = getBBInfo(BB);
@@ -381,7 +376,6 @@ class ProfileAnnotator final {
if (EdgeCount > MaxCount)
MaxCount = EdgeCount;
EdgeCounts[SuccIdx] = EdgeCount;
- PB.addInternalCount(EdgeCount);
}
if (MaxCount != 0)
@@ -431,16 +425,20 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
// e.g. synthetic weights, etc) because it wouldn't interfere with the
// contextual - based one (which would be in other modules)
auto OnExit = llvm::make_scope_exit([&]() {
+ if (IsPreThinlink)
+ return;
for (auto &F : M)
removeInstrumentation(F);
});
auto &CtxProf = MAM.getResult<CtxProfAnalysis>(M);
- if (CtxProf.contexts().empty())
+ // post-thinlink, we only reprocess for the module(s) containing the
+ // contextual tree. For everything else, OnExit will just clean the
+ // instrumentation.
+ if (!IsPreThinlink && !CtxProf.isInSpecializedModule())
return PreservedAnalyses::none();
const auto FlattenedProfile = CtxProf.flatten();
- InstrProfSummaryBuilder PB(ProfileSummaryBuilder::DefaultCutoffs);
for (auto &F : M) {
if (F.isDeclaration())
continue;
@@ -456,15 +454,26 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
if (It == FlattenedProfile.end())
clearColdFunctionProfile(F);
else {
- ProfileAnnotator S(F, It->second, PB);
+ ProfileAnnotator S(F, It->second);
S.assignProfileData();
}
}
-
- auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
+ InstrProfSummaryBuilder PB(ProfileSummaryBuilder::DefaultCutoffs);
+ // use here the flat profiles just so the importer doesn't complain about
+ // how different the PSIs are between the module with the roots and the
+ // various modules it imports.
+ for (auto &C : FlattenedProfile) {
+ PB.addEntryCount(C.second[0]);
+ for (auto V : llvm::drop_begin(C.second))
+ PB.addInternalCount(V);
+ }
M.setProfileSummary(PB.getSummary()->getMD(M.getContext()),
ProfileSummary::Kind::PSK_Instr);
- PSI.refresh();
+ PreservedAnalyses PA;
+ PA.abandon<ProfileSummaryAnalysis>();
+ MAM.invalidate(M, PA);
+ auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
+ PSI.refresh(PB.getSummary());
return PreservedAnalyses::none();
}
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
index c7b325bdbfff9..6daf4f5020043 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
@@ -4,9 +4,10 @@
; RUN: split-file %s %t
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile.yaml --output=%t/profile.ctxprofdata
; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' -use-ctx-profile=%t/profile.ctxprofdata \
-; RUN: %t/example.ll -S -o %t/prelink.ll
-; RUN: FileCheck --input-file %t/prelink.ll %s --check-prefix=PRELINK
-; RUN: opt -passes='ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata %t/prelink.ll -S | FileCheck %s
+; RUN: %t/example.ll -S -o %t/4909520559318251808.ll
+; RUN: FileCheck --input-file %t/4909520559318251808.ll %s --check-prefix=PRELINK
+
+; RUN: opt -passes='ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata %t/4909520559318251808.ll -S | FileCheck %s --check-prefix=POSTLINK
;
;
; Check that instrumentation occurs where expected: the "no" block for both foo and
@@ -18,50 +19,63 @@
; PRELINK-NEXT: call void @llvm.instrprof.increment(ptr @foo, i64 [[#]], i32 2, i32 1)
; PRELINK-LABEL: @an_entrypoint
+; PRELINK: br i1 %t, label %yes, label %common.ret, !prof ![[PREPROF:[0-9]+]]
; PRELINK-LABEL: yes:
; PRELINK-NEXT: call void @llvm.instrprof.increment(ptr @an_entrypoint, i64 [[#]], i32 2, i32 1)
-; PRELINK-NOT: "ProfileSummary"
+; PRELINK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
+; PRELINK: ![[#]] = !{!"TotalCount", i64 3595}
+; PRELINK: ![[#]] = !{!"MaxCount", i64 3000}
+; PRELINK: ![[#]] = !{!"MaxInternalCount", i64 3000}
+; PRELINK: ![[#]] = !{!"MaxFunctionCount", i64 300}
+; PRELINK: ![[#]] = !{!"NumCounts", i64 6}
+; PRELINK: ![[#]] = !{!"NumFunctions", i64 3}
+; PRELINK: ![[PREPROF]] = !{!"branch_weights", i32 40, i32 60}
; Check that the output has:
; - no instrumentation
; - the 2 functions have an entry count
; - each conditional branch has profile annotation
;
-; CHECK-NOT: call void @llvm.instrprof
+; POSTLINK-NOT: call void @llvm.instrprof
;
; make sure we have function entry counts, branch weights, and a profile summary.
-; CHECK-LABEL: @foo
-; CHECK-SAME: !prof ![[FOO_EP:[0-9]+]]
-; CHECK: br i1 %t, label %yes, label %no, !prof ![[FOO_BW:[0-9]+]]
-; CHECK-LABEL: @an_entrypoint
-; CHECK-SAME: !prof ![[AN_ENTRYPOINT_EP:[0-9]+]]
-; CHECK: br i1 %t, label %yes, label %common.ret, !prof ![[AN_ENTRYPOINT_BW:[0-9]+]]
+; POSTLINK-LABEL: @foo
+; POSTLINK-SAME: !prof ![[FOO_EP:[0-9]+]]
+; POSTLINK: br i1 %t, label %yes, label %no, !prof ![[FOO_BW:[0-9]+]]
+; POSTLINK-LABEL: @an_entrypoint
+; POSTLINK-SAME: !prof ![[AN_ENTRYPOINT_EP:[0-9]+]]
+; POSTLINK: br i1 %t, label %yes, label %common.ret, !prof ![[AN_ENTRYPOINT_BW:[0-9]+]]
+; The postlink summary is restricted to the stuff under the root - including the
+; "unhandled" data.
+; POSTLINK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
+; POSTLINK: ![[#]] = !{!"TotalCount", i64 1495}
+; POSTLINK: ![[#]] = !{!"MaxCount", i64 1000}
+; POSTLINK: ![[#]] = !{!"MaxInternalCount", i64 1000}
+; POSTLINK: ![[#]] = !{!"MaxFunctionCount", i64 200}
+; POSTLINK: ![[#]] = !{!"NumCounts", i64 6}
+; POSTLINK: ![[#]] = !{!"NumFunctions", i64 3}
-; CHECK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
-; CHECK: ![[#]] = !{!"TotalCount", i64 480}
-; CHECK: ![[#]] = !{!"MaxCount", i64 140}
-; CHECK: ![[#]] = !{!"MaxInternalCount", i64 125}
-; CHECK: ![[#]] = !{!"MaxFunctionCount", i64 140}
-; CHECK: ![[#]] = !{!"NumCounts", i64 6}
-; CHECK: ![[#]] = !{!"NumFunctions", i64 2}
;
; @foo will be called both unconditionally and conditionally, on the "yes" branch
; which has a count of 40. So 140 times.
-; CHECK: ![[FOO_EP]] = !{!"function_entry_count", i64 140}
+; POSTLINK: ![[FOO_EP]] = !{!"function_entry_count", i64 140}
; foo's "no" branch is taken 10+5 times (from the 2 contexts belonging to foo).
; Which means its "yes" branch is taken 140 - 15 times.
-; CHECK: ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15}
-; CHECK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100}
-; CHECK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60}
+; POSTLINK: ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15}
+; POSTLINK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100}
+; POSTLINK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60}
;--- profile.yaml
Contexts:
- Guid: 4909520559318251808
TotalRootEntryCount: 100
+ Unhandled:
+ - Guid: 1234
+ Counters: [200, 1000]
Counters: [100, 40]
Callsites: -
- Guid: 11872291593386833696
@@ -69,6 +83,9 @@ Contexts:
-
- Guid: 11872291593386833696
Counters: [ 40, 10 ]
+FlatProfiles:
+ - Guid: 1234
+ Counters: [ 100, 2000 ]
;--- example.ll
declare void @bar()
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll
index b10eb6a6ec1b1..bf672998c1e39 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll
@@ -7,9 +7,9 @@
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile_pump.yaml --output=%t/profile_pump.ctxprofdata
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile_unreachable.yaml --output=%t/profile_unreachable.ctxprofdata
;
-; RUN: opt -passes=ctx-prof-flatten %t/example_ok.ll -use-ctx-profile=%t/profile_ok.ctxprofdata -S -o - | FileCheck %s
-; RUN: not --crash opt -passes=ctx-prof-flatten %t/message_pump.ll -use-ctx-profile=%t/profile_pump.ctxprofdata -S 2>&1 | FileCheck %s --check-prefix=ASSERTION
-; RUN: not --crash opt -passes=ctx-prof-flatten %t/unreachable.ll -use-ctx-profile=%t/profile_unreachable.ctxprofdata -S 2>&1 | FileCheck %s --check-prefix=ASSERTION
+; RUN: opt -passes=ctx-prof-flatten %t/example_ok.ll -ctx-profile-force-is-specialized -use-ctx-profile=%t/profile_ok.ctxprofdata -S -o - | FileCheck %s
+; RUN: not --crash opt -passes=ctx-prof-flatten %t/message_pump.ll -ctx-profile-force-is-specialized -use-ctx-profile=%t/profile_pump.ctxprofdata -S 2>&1 | FileCheck %s --check-prefix=ASSERTION
+; RUN: not --crash opt -passes=ctx-prof-flatten %t/unreachable.ll -ctx-profile-force-is-specialized -use-ctx-profile=%t/profile_unreachable.ctxprofdata -S 2>&1 | FileCheck %s --check-prefix=ASSERTION
; CHECK: br i1 %x, label %b1, label %exit, !prof ![[PROF1:[0-9]+]]
; CHECK: br i1 %y, label %blk, label %exit, !prof ![[PROF2:[0-9]+]]
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
index 4c638b6d4cc19..558f385b4bb9d 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
@@ -1,7 +1,7 @@
; Check that flattened profile lowering handles cold subgraphs that end in "unreachable"
; RUN: split-file %s %t
; RUN: llvm-ctxprof-util fromYAML --input=%t/profile.yaml --output=%t/profile.ctxprofdata
-; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s
+; RUN: opt -passes=ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s
; CHECK-LABEL: entry:
; CHECK: br i1 %t, label %yes, label %no, !prof ![[C1:[0-9]+]]
@@ -16,7 +16,7 @@
; CHECK-NOT: !prof
; CHECK: ![[C1]] = !{!"branch_weights", i32 6, i32 0}
-;--- example.ll
+;--- 1234.ll
define void @f1(i32 %cond) !guid !0 {
entry:
call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 0)
diff --git a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
deleted file mode 100644
index 56ed92ea1b7ff..0000000000000
--- a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; There is no profile, but that's OK because the prelink does not care about
-; the content of the profile, just that we intend to use one.
-; There is no scenario currently of doing ctx profile use without thinlto.
-;
-; RUN: opt -passes='thinlto-pre-link<O2>' -use-ctx-profile=something_that_does_not_exist %s -S | FileCheck %s
-; RUN: opt -debug-info-for-profi...
[truncated]
|
579bb9c
to
131854f
Compare
e935ffa
to
3489b48
Compare
131854f
to
d1edfa2
Compare
3489b48
to
e00b293
Compare
e1eeb79
to
f78fe14
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm.
f78fe14
to
e465744
Compare
e465744
to
d443d0c
Compare
…4766) Same idea as in #134723 - flatten indirect call info in `"VP"` `MD_prof` metadata for the thinlinker, for cases that aren't covered by a contextual profile. If we don't ICP an indirect call target in the specialized module, the call will fall to the copy of that target outside the specialized module. If the graph under that target also has some indirect calls, in the absence of this pass, we'd have a steeper performance regression - because none of those would have a chance to be ICPed.
) Flatten the profile pre-thinlink so that ThinLTO has something to work with for the parts of the binary that aren't covered by contextual profiles. Post-thinlink, the flattener is re-run and will actually change profile info, but just for the modules containing contextual trees ("specialized modules"). For the rest, the flattener just yanks out the instrumentation.
…m#134766) Same idea as in llvm#134723 - flatten indirect call info in `"VP"` `MD_prof` metadata for the thinlinker, for cases that aren't covered by a contextual profile. If we don't ICP an indirect call target in the specialized module, the call will fall to the copy of that target outside the specialized module. If the graph under that target also has some indirect calls, in the absence of this pass, we'd have a steeper performance regression - because none of those would have a chance to be ICPed.
Flatten the profile pre-thinlink so that ThinLTO has something to work with for the parts of the binary that aren't covered by contextual profiles. Post-thinlink, the flattener is re-run and will actually change profile info, but just for the modules containing contextual trees ("specialized modules"). For the rest, the flattener just yanks out the instrumentation.