Skip to content

[ctxprof] Scale up everything under a root by its TotalRootEntryCount #136015

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 36 additions & 23 deletions llvm/lib/Analysis/CtxProfAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -621,17 +621,23 @@ CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
return nullptr;
}

template <class ProfilesTy, class ProfTy>
static void preorderVisit(ProfilesTy &Profiles,
function_ref<void(ProfTy &)> Visitor) {
template <class ProfTy>
static void preorderVisitOneRoot(ProfTy &Profile,
function_ref<void(ProfTy &)> Visitor) {
std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
Visitor(Ctx);
for (auto &[_, SubCtxSet] : Ctx.callsites())
for (auto &[__, Subctx] : SubCtxSet)
Traverser(Subctx);
};
Traverser(Profile);
}

template <class ProfilesTy, class ProfTy>
static void preorderVisit(ProfilesTy &Profiles,
function_ref<void(ProfTy &)> Visitor) {
for (auto &[_, P] : Profiles)
Traverser(P);
preorderVisitOneRoot<ProfTy>(P, Visitor);
}

void PGOContextualProfile::initIndex() {
Expand Down Expand Up @@ -683,40 +689,47 @@ void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {
const CtxProfFlatProfile PGOContextualProfile::flatten() const {
CtxProfFlatProfile Flat;
auto Accummulate = [](SmallVectorImpl<uint64_t> &Into,
const SmallVectorImpl<uint64_t> &From) {
const SmallVectorImpl<uint64_t> &From,
uint64_t SamplingRate) {
if (Into.empty())
Into.resize(From.size());
assert(Into.size() == From.size() &&
"All contexts corresponding to a function should have the exact "
"same number of counters.");
for (size_t I = 0, E = Into.size(); I < E; ++I)
Into[I] += From[I];
Into[I] += From[I] * SamplingRate;
};

preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
const PGOCtxProfContext>(
Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
Accummulate(Flat[Ctx.guid()], Ctx.counters());
});
for (const auto &[_, RC] : Profiles.Contexts)
for (const auto &[G, Unh] : RC.getUnhandled())
Accummulate(Flat[G], Unh);
for (const auto &[_, CtxRoot] : Profiles.Contexts) {
const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount();
preorderVisitOneRoot<const PGOCtxProfContext>(
CtxRoot, [&](const PGOCtxProfContext &Ctx) {
Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor);
});

for (const auto &[G, Unh] : CtxRoot.getUnhandled())
Accummulate(Flat[G], Unh, SamplingFactor);
}
// We don't sample "Flat" currently, so sampling rate is 1.
for (const auto &[G, FC] : Profiles.FlatProfiles)
Accummulate(Flat[G], FC);
Accummulate(Flat[G], FC, /*SamplingRate=*/1);
return Flat;
}

const CtxProfFlatIndirectCallProfile
PGOContextualProfile::flattenVirtCalls() const {
CtxProfFlatIndirectCallProfile Ret;
preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
const PGOCtxProfContext>(
Profiles.Contexts, [&](const PGOCtxProfContext &Ctx) {
auto &Targets = Ret[Ctx.guid()];
for (const auto &[ID, SubctxSet] : Ctx.callsites())
for (const auto &Subctx : SubctxSet)
Targets[ID][Subctx.first] += Subctx.second.getEntrycount();
});
for (const auto &[_, CtxRoot] : Profiles.Contexts) {
const uint64_t TotalRootEntryCount = CtxRoot.getTotalRootEntryCount();
preorderVisitOneRoot<const PGOCtxProfContext>(
CtxRoot, [&](const PGOCtxProfContext &Ctx) {
auto &Targets = Ret[Ctx.guid()];
for (const auto &[ID, SubctxSet] : Ctx.callsites())
for (const auto &Subctx : SubctxSet)
Targets[ID][Subctx.first] +=
Subctx.second.getEntrycount() * TotalRootEntryCount;
});
}
return Ret;
}

Expand Down
26 changes: 13 additions & 13 deletions llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@
; PRELINK-LABEL: yes:
; PRELINK-NEXT: call void @llvm.instrprof.increment(ptr @an_entrypoint, i64 [[#]], i32 2, i32 1)
; PRELINK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
; PRELINK: ![[#]] = !{!"TotalCount", i64 3595}
; PRELINK: ![[#]] = !{!"MaxCount", i64 3000}
; PRELINK: ![[#]] = !{!"MaxInternalCount", i64 3000}
; PRELINK: ![[#]] = !{!"MaxFunctionCount", i64 300}
; PRELINK: ![[#]] = !{!"TotalCount", i64 151600}
; PRELINK: ![[#]] = !{!"MaxCount", i64 102000}
; PRELINK: ![[#]] = !{!"MaxInternalCount", i64 102000}
; PRELINK: ![[#]] = !{!"MaxFunctionCount", i64 20100}
; PRELINK: ![[#]] = !{!"NumCounts", i64 6}
; PRELINK: ![[#]] = !{!"NumFunctions", i64 3}
; PRELINK: ![[PREPROF]] = !{!"branch_weights", i32 40, i32 60}
; PRELINK: ![[PREPROF]] = !{!"branch_weights", i32 4000, i32 6000}

; Check that the output has:
; - no instrumentation
Expand All @@ -49,25 +49,25 @@
; The postlink summary is restricted to the stuff under the root - including the
; "unhandled" data.
; POSTLINK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
; POSTLINK: ![[#]] = !{!"TotalCount", i64 1495}
; POSTLINK: ![[#]] = !{!"MaxCount", i64 1000}
; POSTLINK: ![[#]] = !{!"MaxInternalCount", i64 1000}
; POSTLINK: ![[#]] = !{!"MaxFunctionCount", i64 200}
; POSTLINK: ![[#]] = !{!"TotalCount", i64 149500}
; POSTLINK: ![[#]] = !{!"MaxCount", i64 100000}
; POSTLINK: ![[#]] = !{!"MaxInternalCount", i64 100000}
; POSTLINK: ![[#]] = !{!"MaxFunctionCount", i64 20000}
; POSTLINK: ![[#]] = !{!"NumCounts", i64 6}
; POSTLINK: ![[#]] = !{!"NumFunctions", i64 3}

;
; @foo will be called both unconditionally and conditionally, on the "yes" branch
; which has a count of 40. So 140 times.

; POSTLINK: ![[FOO_EP]] = !{!"function_entry_count", i64 140}
; POSTLINK: ![[FOO_EP]] = !{!"function_entry_count", i64 14000}

; foo's "no" branch is taken 10+5 times (from the 2 contexts belonging to foo).
; Which means its "yes" branch is taken 140 - 15 times.

; POSTLINK: ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15}
; POSTLINK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100}
; POSTLINK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60}
; POSTLINK: ![[FOO_BW]] = !{!"branch_weights", i32 12500, i32 1500}
; POSTLINK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 10000}
; POSTLINK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 4000, i32 6000}

;--- profile.yaml
Contexts:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/CtxProfAnalysis/flatten-check-path.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@

; CHECK: br i1 %x, label %b1, label %exit, !prof ![[PROF1:[0-9]+]]
; CHECK: br i1 %y, label %blk, label %exit, !prof ![[PROF2:[0-9]+]]
; CHECK: ![[PROF1]] = !{!"branch_weights", i32 1, i32 1}
; CHECK: ![[PROF2]] = !{!"branch_weights", i32 0, i32 1}
; CHECK: ![[PROF1]] = !{!"branch_weights", i32 2, i32 2}
; CHECK: ![[PROF2]] = !{!"branch_weights", i32 0, i32 2}
; ASSERTION: Assertion `allTakenPathsExit()

; b1->exit is the only way out from b1, but the exit block would have been
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
; PRELINK-NEXT: call void %p(), !prof ![[VPPROF:[0-9]+]]
; PRELINK-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 1234, i32 2, i32 1, ptr @bar)
; PRELINK-NEXT: call void @bar(){{$}}
; PRELINK: ![[VPPROF]] = !{!"VP", i32 0, i64 5, i64 5678, i64 4, i64 5555, i64 1}
; PRELINK: ![[VPPROF]] = !{!"VP", i32 0, i64 25, i64 5678, i64 20, i64 5555, i64 5}

; RUN: cp %t/example.ll %t/1234.ll
; RUN: opt -passes=ctx-prof-flatten %t/1234.ll -use-ctx-profile=%t/profile.ctxprofdata \
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
; CHECK-LABEL: yes:
; CHECK: br i1 %t3, label %yes1, label %yes2, !prof ![[C1]]
; CHECK-NOT: !prof
; CHECK: ![[C1]] = !{!"branch_weights", i32 6, i32 0}
; CHECK: ![[C1]] = !{!"branch_weights", i32 72, i32 0}

;--- 1234.ll
define void @f1(i32 %cond) !guid !0 {
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ Contexts:
Counters: [ 1, 2 ]

Flat Profile:
2072045998141807037 : 7
3087265239403591524 : 11 9
4197650231481825559 : 2
10507721908651011566 : 1
2072045998141807037 : 70
3087265239403591524 : 110 90
4197650231481825559 : 20
10507721908651011566 : 10
8 changes: 5 additions & 3 deletions llvm/test/Analysis/CtxProfAnalysis/inline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,13 @@
; PIPELINE-LABEL: loop:
; PIPELINE: br i1 %cond, label %loop, label %exit, !prof ![[LOOP_BW_ORIG:[0-9]+]]

; PIPELINE: ![[ENTRYPOINT_COUNT]] = !{!"function_entry_count", i64 10}
; *Note* that all values are multiplied by the TotalRootEntryCount, which is 24
;
; PIPELINE: ![[ENTRYPOINT_COUNT]] = !{!"function_entry_count", i64 240}
; These are the weights of the inlined @a, where the counters were 2, 100 (2 for entry, 100 for loop)
; PIPELINE: ![[LOOP_BW_INL]] = !{!"branch_weights", i32 98, i32 2}
; PIPELINE: ![[LOOP_BW_INL]] = !{!"branch_weights", i32 2352, i32 48}
; These are the weights of the un-inlined @a, where the counters were 8, 500 (8 for entry, 500 for loop)
; PIPELINE: ![[LOOP_BW_ORIG]] = !{!"branch_weights", i32 492, i32 8}
; PIPELINE: ![[LOOP_BW_ORIG]] = !{!"branch_weights", i32 11808, i32 192}

;--- 1000.ll
define i32 @entrypoint(i32 %x) !guid !0 {
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Analysis/CtxProfAnalysis/load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ Contexts:
Counters: [ 5 ]

Flat Profile:
12341 : 9
728453322856651412 : 6 7
11872291593386833696 : 1
12074870348631550642 : 5
12341 : 810
728453322856651412 : 24 28
11872291593386833696 : 4
12074870348631550642 : 120
;--- example.ll
declare void @bar()

Expand Down
Loading