-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[ctxprof] Flatten indirect call info in pre-thinlink compilation #134766
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ctxprof] Flatten indirect call info in pre-thinlink compilation #134766
Conversation
a492868
to
31ec58f
Compare
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-pgo Author: Mircea Trofin (mtrofin) ChangesSame idea as in #134723 - flatten indirect call info in Full diff: https://github.com/llvm/llvm-project/pull/134766.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index 023b5a9bdb848..6f1c3696ca78c 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -21,6 +21,10 @@ namespace llvm {
class CtxProfAnalysis;
+using FlatIndirectTargets = DenseMap<GlobalValue::GUID, uint64_t>;
+using CtxProfFlatIndirectCallProfile =
+ DenseMap<GlobalValue::GUID, DenseMap<uint32_t, FlatIndirectTargets>>;
+
/// The instrumented contextual profile, produced by the CtxProfAnalysis.
class PGOContextualProfile {
friend class CtxProfAnalysis;
@@ -101,6 +105,7 @@ class PGOContextualProfile {
void visit(ConstVisitor, const Function *F = nullptr) const;
const CtxProfFlatProfile flatten() const;
+ const CtxProfFlatIndirectCallProfile flattenVirtCalls() const;
bool invalidate(Module &, const PreservedAnalyses &PA,
ModuleAnalysisManager::Invalidator &) {
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 4042c87369462..304a77014f407 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -334,6 +334,20 @@ const CtxProfFlatProfile PGOContextualProfile::flatten() const {
return Flat;
}
+const CtxProfFlatIndirectCallProfile
+PGOContextualProfile::flattenVirtCalls() const {
+ CtxProfFlatIndirectCallProfile Ret;
+ preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
+ const PGOCtxProfContext>(
+ Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
+ auto &Targets = Ret[Ctx.guid()];
+ for (const auto &[ID, SubctxSet] : Ctx.callsites())
+ for (const auto &Subctx : SubctxSet)
+ Targets[ID][Subctx.first] += Subctx.second.getEntrycount();
+ });
+ return Ret;
+}
+
void CtxProfAnalysis::collectIndirectCallPromotionList(
CallBase &IC, Result &Profile,
SetVector<std::pair<CallBase *, Function *>> &Candidates) {
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index ffe0f385047c3..9b44d61726fa1 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -36,9 +36,12 @@
#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <deque>
+#include <functional>
using namespace llvm;
+#define DEBUG_TYPE "ctx_prof_flatten"
+
namespace {
class ProfileAnnotator final {
@@ -414,6 +417,58 @@ void removeInstrumentation(Function &F) {
I.eraseFromParent();
}
+void annotateIndirectCall(
+ Module &M, CallBase &CB,
+ const DenseMap<uint32_t, FlatIndirectTargets> &FlatProf,
+ const InstrProfCallsite &Ins) {
+ auto Idx = Ins.getIndex()->getZExtValue();
+ auto FIt = FlatProf.find(Idx);
+ if (FIt == FlatProf.end())
+ return;
+ const auto &Targets = FIt->second;
+ SmallVector<InstrProfValueData, 2> Data;
+ uint64_t Sum = 0;
+ for (auto &[Guid, Count] : Targets) {
+ Data.push_back({/*.Value=*/Guid, /*.Count=*/Count});
+ Sum += Count;
+ }
+ struct InstrProfValueDataGTComparer {
+ bool operator()(const InstrProfValueData &A, const InstrProfValueData &B) {
+ return A.Count > B.Count;
+ }
+ };
+ llvm::sort(Data, InstrProfValueDataGTComparer());
+ llvm::annotateValueSite(M, CB, Data, Sum,
+ InstrProfValueKind::IPVK_IndirectCallTarget,
+ Data.size());
+ LLVM_DEBUG(dbgs() << "[ctxprof] flat indirect call prof: " << CB
+ << CB.getMetadata(LLVMContext::MD_prof) << "\n");
+}
+
+// We normally return a "Changed" bool, but the calling pass' run assumes
+// something will change - some profile will be added - so this won't add much
+// by returning false when applicable.
+void annotateIndCalls(Module &M, const CtxProfAnalysis::Result &CtxProf) {
+ const auto FlatIndCalls = CtxProf.flattenVirtCalls();
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ auto FlatProfIter = FlatIndCalls.find(AssignGUIDPass::getGUID(F));
+ if (FlatProfIter == FlatIndCalls.end())
+ continue;
+ const auto &FlatProf = FlatProfIter->second;
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB || !CB->isIndirectCall())
+ continue;
+ if (auto *Ins = CtxProfAnalysis::getCallsiteInstrumentation(*CB))
+ annotateIndirectCall(M, *CB, FlatProf, *Ins);
+ }
+ }
+ }
+}
+
} // namespace
PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
@@ -437,6 +492,8 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
if (!IsPreThinlink && !CtxProf.isInSpecializedModule())
return PreservedAnalyses::none();
+ if (IsPreThinlink)
+ annotateIndCalls(M, CtxProf);
const auto FlattenedProfile = CtxProf.flatten();
for (auto &F : M) {
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
new file mode 100644
index 0000000000000..13beddc05c7a2
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
@@ -0,0 +1,50 @@
+; REQUIRES:x86_64-linux
+
+; Test flattening indirect calls into "VP" MD_prof metadata, in prelink.
+
+; RUN: split-file %s %t
+; RUN: llvm-ctxprof-util fromYAML --input %t/profile.yaml --output %t/profile.ctxprofdata
+; RUN: opt -passes=ctx-prof-flatten-prethinlink %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=PRELINK
+
+; PRELINK: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 0, ptr %p)
+; PRELINK-NEXT: call void %p(), !prof ![[VPPROF:[0-9]+]]
+; PRELINK-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
+; PRELINK-NEXT: call void @bar(){{$}}
+; PRELINK: ![[VPPROF]] = !{!"VP", i32 0, i64 5, i64 5678, i64 4, i64 5555, i64 1}
+
+; RUN: cp %t/example.ll %t/1234.ll
+; RUN: opt -passes=ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=POSTLINK
+; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=POSTLINK
+
+; POSTLINK-NOT: call void %p(), !prof
+;--- example.ll
+
+declare !guid !0 void @bar()
+
+define void @foo(ptr %p) !guid !1 {
+ call void @llvm.instrprof.increment(ptr @foo, i64 1234, i32 1, i32 0)
+ call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 0, ptr %p)
+ call void %p()
+ call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
+ call void @bar()
+ ret void
+}
+
+!0 = !{i64 8888}
+!1 = !{i64 1234}
+
+;--- profile.yaml
+Contexts:
+ - Guid: 1234
+ TotalRootEntryCount: 5
+ Counters: [5]
+ Callsites:
+ - - Guid: 5555
+ Counters: [1]
+ - Guid: 5678
+ Counters: [4]
+ - - Guid: 8888
+ Counters: [5]
|
@llvm/pr-subscribers-llvm-transforms Author: Mircea Trofin (mtrofin) ChangesSame idea as in #134723 - flatten indirect call info in Full diff: https://github.com/llvm/llvm-project/pull/134766.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index 023b5a9bdb848..6f1c3696ca78c 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -21,6 +21,10 @@ namespace llvm {
class CtxProfAnalysis;
+using FlatIndirectTargets = DenseMap<GlobalValue::GUID, uint64_t>;
+using CtxProfFlatIndirectCallProfile =
+ DenseMap<GlobalValue::GUID, DenseMap<uint32_t, FlatIndirectTargets>>;
+
/// The instrumented contextual profile, produced by the CtxProfAnalysis.
class PGOContextualProfile {
friend class CtxProfAnalysis;
@@ -101,6 +105,7 @@ class PGOContextualProfile {
void visit(ConstVisitor, const Function *F = nullptr) const;
const CtxProfFlatProfile flatten() const;
+ const CtxProfFlatIndirectCallProfile flattenVirtCalls() const;
bool invalidate(Module &, const PreservedAnalyses &PA,
ModuleAnalysisManager::Invalidator &) {
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 4042c87369462..304a77014f407 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -334,6 +334,20 @@ const CtxProfFlatProfile PGOContextualProfile::flatten() const {
return Flat;
}
+const CtxProfFlatIndirectCallProfile
+PGOContextualProfile::flattenVirtCalls() const {
+ CtxProfFlatIndirectCallProfile Ret;
+ preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
+ const PGOCtxProfContext>(
+ Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
+ auto &Targets = Ret[Ctx.guid()];
+ for (const auto &[ID, SubctxSet] : Ctx.callsites())
+ for (const auto &Subctx : SubctxSet)
+ Targets[ID][Subctx.first] += Subctx.second.getEntrycount();
+ });
+ return Ret;
+}
+
void CtxProfAnalysis::collectIndirectCallPromotionList(
CallBase &IC, Result &Profile,
SetVector<std::pair<CallBase *, Function *>> &Candidates) {
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index ffe0f385047c3..9b44d61726fa1 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -36,9 +36,12 @@
#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <deque>
+#include <functional>
using namespace llvm;
+#define DEBUG_TYPE "ctx_prof_flatten"
+
namespace {
class ProfileAnnotator final {
@@ -414,6 +417,58 @@ void removeInstrumentation(Function &F) {
I.eraseFromParent();
}
+void annotateIndirectCall(
+ Module &M, CallBase &CB,
+ const DenseMap<uint32_t, FlatIndirectTargets> &FlatProf,
+ const InstrProfCallsite &Ins) {
+ auto Idx = Ins.getIndex()->getZExtValue();
+ auto FIt = FlatProf.find(Idx);
+ if (FIt == FlatProf.end())
+ return;
+ const auto &Targets = FIt->second;
+ SmallVector<InstrProfValueData, 2> Data;
+ uint64_t Sum = 0;
+ for (auto &[Guid, Count] : Targets) {
+ Data.push_back({/*.Value=*/Guid, /*.Count=*/Count});
+ Sum += Count;
+ }
+ struct InstrProfValueDataGTComparer {
+ bool operator()(const InstrProfValueData &A, const InstrProfValueData &B) {
+ return A.Count > B.Count;
+ }
+ };
+ llvm::sort(Data, InstrProfValueDataGTComparer());
+ llvm::annotateValueSite(M, CB, Data, Sum,
+ InstrProfValueKind::IPVK_IndirectCallTarget,
+ Data.size());
+ LLVM_DEBUG(dbgs() << "[ctxprof] flat indirect call prof: " << CB
+ << CB.getMetadata(LLVMContext::MD_prof) << "\n");
+}
+
+// We normally return a "Changed" bool, but the calling pass' run assumes
+// something will change - some profile will be added - so this won't add much
+// by returning false when applicable.
+void annotateIndCalls(Module &M, const CtxProfAnalysis::Result &CtxProf) {
+ const auto FlatIndCalls = CtxProf.flattenVirtCalls();
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ auto FlatProfIter = FlatIndCalls.find(AssignGUIDPass::getGUID(F));
+ if (FlatProfIter == FlatIndCalls.end())
+ continue;
+ const auto &FlatProf = FlatProfIter->second;
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB || !CB->isIndirectCall())
+ continue;
+ if (auto *Ins = CtxProfAnalysis::getCallsiteInstrumentation(*CB))
+ annotateIndirectCall(M, *CB, FlatProf, *Ins);
+ }
+ }
+ }
+}
+
} // namespace
PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
@@ -437,6 +492,8 @@ PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
if (!IsPreThinlink && !CtxProf.isInSpecializedModule())
return PreservedAnalyses::none();
+ if (IsPreThinlink)
+ annotateIndCalls(M, CtxProf);
const auto FlattenedProfile = CtxProf.flatten();
for (auto &F : M) {
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
new file mode 100644
index 0000000000000..13beddc05c7a2
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-insert-icp-mdprof.ll
@@ -0,0 +1,50 @@
+; REQUIRES:x86_64-linux
+
+; Test flattening indirect calls into "VP" MD_prof metadata, in prelink.
+
+; RUN: split-file %s %t
+; RUN: llvm-ctxprof-util fromYAML --input %t/profile.yaml --output %t/profile.ctxprofdata
+; RUN: opt -passes=ctx-prof-flatten-prethinlink %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=PRELINK
+
+; PRELINK: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 0, ptr %p)
+; PRELINK-NEXT: call void %p(), !prof ![[VPPROF:[0-9]+]]
+; PRELINK-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
+; PRELINK-NEXT: call void @bar(){{$}}
+; PRELINK: ![[VPPROF]] = !{!"VP", i32 0, i64 5, i64 5678, i64 4, i64 5555, i64 1}
+
+; RUN: cp %t/example.ll %t/1234.ll
+; RUN: opt -passes=ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=POSTLINK
+; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: -S -o - | FileCheck %s --check-prefix=POSTLINK
+
+; POSTLINK-NOT: call void %p(), !prof
+;--- example.ll
+
+declare !guid !0 void @bar()
+
+define void @foo(ptr %p) !guid !1 {
+ call void @llvm.instrprof.increment(ptr @foo, i64 1234, i32 1, i32 0)
+ call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 0, ptr %p)
+ call void %p()
+ call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
+ call void @bar()
+ ret void
+}
+
+!0 = !{i64 8888}
+!1 = !{i64 1234}
+
+;--- profile.yaml
+Contexts:
+ - Guid: 1234
+ TotalRootEntryCount: 5
+ Counters: [5]
+ Callsites:
+ - - Guid: 5555
+ Counters: [1]
+ - Guid: 5678
+ Counters: [4]
+ - - Guid: 8888
+ Counters: [5]
|
d1edfa2
to
a31066a
Compare
a9b122b
to
666db91
Compare
a31066a
to
e1eeb79
Compare
666db91
to
7a97443
Compare
e1eeb79
to
f78fe14
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
f78fe14
to
e465744
Compare
7a97443
to
d0be075
Compare
e465744
to
d443d0c
Compare
d0be075
to
01dbaf7
Compare
…m#134766) Same idea as in llvm#134723 - flatten indirect call info in `"VP"` `MD_prof` metadata for the thinlinker, for cases that aren't covered by a contextual profile. If we don't ICP an indirect call target in the specialized module, the call will fall to the copy of that target outside the specialized module. If the graph under that target also has some indirect calls, in the absence of this pass, we'd have a steeper performance regression - because none of those would have a chance to be ICPed.
Same idea as in #134723 - flatten indirect call info in
"VP"
MD_prof
metadata for the thinlinker, for cases that aren't covered by a contextual profile. If we don't ICP an indirect call target in the specialized module, the call will fall to the copy of that target outside the specialized module. If the graph under that target also has some indirect calls, in the absence of this pass, we'd have a steeper performance regression - because none of those would have a chance to be ICPed.