Skip to content

[ctx_prof] Handle select and its step instrumentation #109185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/Analysis/CtxProfAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ class CtxProfAnalysis : public AnalysisInfoMixin<CtxProfAnalysis> {

/// Get the instruction instrumenting a BB, or nullptr if not present.
static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB);

/// Get the step instrumentation associated with a `select`
static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI);
};

class CtxProfAnalysisPrinterPass
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Analysis/CtxProfAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,15 @@ InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
return nullptr;
}

InstrProfIncrementInstStep *
CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
Instruction *Prev = &SI;
while ((Prev = Prev->getPrevNode()))
if (auto *Step = dyn_cast<InstrProfIncrementInstStep>(Prev))
return Step;
return nullptr;
}

template <class ProfilesTy, class ProfTy>
static void preorderVisit(ProfilesTy &Profiles,
function_ref<void(ProfTy &)> Visitor,
Expand Down
52 changes: 50 additions & 2 deletions llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ class ProfileAnnotator final {

bool hasCount() const { return Count.has_value(); }

uint64_t getCount() const { return *Count; }

bool trySetSingleUnknownInEdgeCount() {
if (UnknownCountInEdges == 1) {
setSingleUnknownEdgeCount(InEdges);
Expand Down Expand Up @@ -266,6 +268,21 @@ class ProfileAnnotator final {
return HitExit;
}

bool allNonColdSelectsHaveProfile() const {
for (const auto &BB : F) {
if (getBBInfo(BB).getCount() > 0) {
for (const auto &I : BB) {
if (const auto *SI = dyn_cast<SelectInst>(&I)) {
if (!SI->getMetadata(LLVMContext::MD_prof)) {
return false;
}
}
}
}
}
return true;
}

public:
ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters,
InstrProfSummaryBuilder &PB)
Expand Down Expand Up @@ -315,6 +332,32 @@ class ProfileAnnotator final {
"populated, because we need pointers to its contents to be stable");
}

void setProfileForSelectInstructions(BasicBlock &BB, const BBInfo &BBInfo) {
if (BBInfo.getCount() == 0)
return;

for (auto &I : BB) {
if (auto *SI = dyn_cast<SelectInst>(&I)) {
if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) {
auto Index = Step->getIndex()->getZExtValue();
assert(Index < Counters.size() &&
"The index of the step instruction must be inside the "
"counters vector by "
"construction - tripping this assertion indicates a bug in "
"how the contextual profile is managed by IPO transforms");
auto TotalCount = BBInfo.getCount();
auto TrueCount = Counters[Index];
auto FalseCount =
(TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount},
std::max(TrueCount, FalseCount));
PB.addInternalCount(TrueCount);
PB.addInternalCount(FalseCount);
}
}
}
}

/// Assign branch weights and function entry count. Also update the PSI
/// builder.
void assignProfileData() {
Expand All @@ -324,12 +367,14 @@ class ProfileAnnotator final {
PB.addEntryCount(Counters[0]);

for (auto &BB : F) {
const auto &BBInfo = getBBInfo(BB);
setProfileForSelectInstructions(BB, BBInfo);
if (succ_size(&BB) < 2)
continue;
auto *Term = BB.getTerminator();
SmallVector<uint64_t, 2> EdgeCounts(Term->getNumSuccessors(), 0);
uint64_t MaxCount = 0;
const auto &BBInfo = getBBInfo(BB);

for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;
++SuccIdx) {
uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
Expand All @@ -343,12 +388,15 @@ class ProfileAnnotator final {
setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
}
assert(allCountersAreAssigned() &&
"Expected all counters have been assigned.");
"[ctx-prof] Expected all counters have been assigned.");
assert(allTakenPathsExit() &&
"[ctx-prof] Encountered a BB with more than one successor, where "
"all outgoing edges have a 0 count. This occurs in non-exiting "
"functions (message pumps, usually) which are not supported in the "
"contextual profiling case");
assert(allNonColdSelectsHaveProfile() &&
"[ctx-prof] All non-cold select instructions were expected to have "
"a profile.");
}
};

Expand Down
16 changes: 15 additions & 1 deletion llvm/lib/Transforms/Utils/InlineFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2223,7 +2223,21 @@ remapIndices(Function &Caller, BasicBlock *StartBB,
}
for (auto &I : llvm::make_early_inc_range(*BB)) {
if (auto *Inc = dyn_cast<InstrProfIncrementInst>(&I)) {
if (Inc != BBID) {
if (isa<InstrProfIncrementInstStep>(Inc)) {
// Step instrumentation is used for select instructions. Inlining may
// have propagated a constant resulting in the condition of the select
// being resolved, case in which function cloning resolves the value
// of the select, and elides the select instruction. If that is the
// case, the step parameter of the instrumentation will reflect that.
// We can delete the instrumentation in that case.
if (isa<Constant>(Inc->getStep())) {
assert(!Inc->getNextNode() || !isa<SelectInst>(Inc->getNextNode()));
Inc->eraseFromParent();
} else {
assert(isa_and_nonnull<SelectInst>(Inc->getNextNode()));
RewriteInstrIfNeeded(*Inc);
}
} else if (Inc != BBID) {
// If we're here it means that the BB had more than 1 IDs, presumably
// some coming from the callee. We "made up our mind" to keep the
// first one (which may or may not have been originally the caller's).
Expand Down
76 changes: 76 additions & 0 deletions llvm/test/Analysis/CtxProfAnalysis/handle-select.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
; Check that we handle `step` instrumentations. These addorn `select`s.
; We don't want to confuse the `step` with normal increments, the latter of which
; we use for BB ID-ing: we want to keep the `step`s after inlining, except if
; the `select` is elided.
;
; RUN: split-file %s %t
; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
;
; RUN: opt -passes=ctx-instr-gen %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=INSTR
; RUN: opt -passes=ctx-instr-gen,module-inline %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=POST-INL
; RUN: opt -passes=ctx-instr-gen,module-inline,ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=FLATTEN

; INSTR-LABEL: yes:
; INSTR-NEXT: call void @llvm.instrprof.increment(ptr @foo, i64 [[#]], i32 2, i32 1)
; INSTR-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 [[#]], i32 2, i32 0, ptr @bar)

; INSTR-LABEL: no:
; INSTR-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 [[#]], i32 2, i32 1, ptr @bar)

; INSTR-LABEL: define i32 @bar
; INSTR-NEXT: call void @llvm.instrprof.increment(ptr @bar, i64 [[#]], i32 2, i32 0)
; INSTR-NEXT: %inc =
; INSTR: %test = icmp eq i32 %t, 0
; INSTR-NEXT: %1 = zext i1 %test to i64
; INSTR-NEXT: call void @llvm.instrprof.increment.step(ptr @bar, i64 [[#]], i32 2, i32 1, i64 %1)
; INSTR-NEXT: %res = select

; POST-INL-LABEL: yes:
; POST-INL-NEXT: call void @llvm.instrprof.increment
; POST-INL: call void @llvm.instrprof.increment.step
; POST-INL-NEXT: %res.i = select

; POST-INL-LABEL: no:
; POST-INL-NEXT: call void @llvm.instrprof.increment
; POST-INL-NEXT: br label

; POST-INL-LABEL: exit:
; POST-INL-NEXT: %res = phi i32 [ %res.i, %yes ], [ 1, %no ]

; FLATTEN-LABEL: yes:
; FLATTEN: %res.i = select i1 %test.i, i32 %inc.i, i32 %dec.i, !prof ![[SELPROF:[0-9]+]]
; FLATTEN-LABEL: no:
;
; See the profile, in the "yes" case we set the step counter's value, in @bar, to 3. The total
; entry count of that BB is 4.
; ![[SELPROF]] = !{!"branch_weights", i32 3, i32 1}

;--- example.ll
define i32 @foo(i32 %t) !guid !0 {
%test = icmp slt i32 %t, 0
br i1 %test, label %yes, label %no
yes:
%res1 = call i32 @bar(i32 %t) alwaysinline
br label %exit
no:
; this will result in eliding the select in @bar, when inlined.
%res2 = call i32 @bar(i32 0) alwaysinline
br label %exit
exit:
%res = phi i32 [%res1, %yes], [%res2, %no]
ret i32 %res
}

define i32 @bar(i32 %t) !guid !1 {
%inc = add i32 %t, 1
%dec = sub i32 %t, 1
%test = icmp eq i32 %t, 0
%res = select i1 %test, i32 %inc, i32 %dec
ret i32 %res
}

!0 = !{i64 1234}
!1 = !{i64 5678}

;--- profile.json
[{"Guid":1234, "Counters":[10, 4], "Callsites":[[{"Guid": 5678, "Counters":[4,3]}],[{"Guid": 5678, "Counters":[6,6]}]]}]
Loading