Skip to content

[ctx_prof] Simple ICP criteria during module inliner #109881

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions llvm/include/llvm/Analysis/CtxProfAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#ifndef LLVM_ANALYSIS_CTXPROFANALYSIS_H
#define LLVM_ANALYSIS_CTXPROFANALYSIS_H

#include "llvm/ADT/SetVector.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/IntrinsicInst.h"
Expand Down Expand Up @@ -63,6 +64,13 @@ class PGOContextualProfile {
return getDefinedFunctionGUID(F) != 0;
}

StringRef getFunctionName(GlobalValue::GUID GUID) const {
auto It = FuncInfo.find(GUID);
if (It == FuncInfo.end())
return "";
return It->second.Name;
}

uint32_t getNumCounters(const Function &F) const {
assert(isFunctionKnown(F));
return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex;
Expand Down Expand Up @@ -120,6 +128,11 @@ class CtxProfAnalysis : public AnalysisInfoMixin<CtxProfAnalysis> {

/// Get the step instrumentation associated with a `select`
static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI);

// FIXME: refactor to an advisor model, and separate
static void collectIndirectCallPromotionList(
CallBase &IC, Result &Profile,
SetVector<std::pair<CallBase *, Function *>> &Candidates);
};

class CtxProfAnalysisPrinterPass
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Analysis/CtxProfAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"

#define DEBUG_TYPE "ctx_prof"

Expand Down Expand Up @@ -309,3 +310,25 @@ const CtxProfFlatProfile PGOContextualProfile::flatten() const {
});
return Flat;
}

void CtxProfAnalysis::collectIndirectCallPromotionList(
CallBase &IC, Result &Profile,
SetVector<std::pair<CallBase *, Function *>> &Candidates) {
const auto *Instr = CtxProfAnalysis::getCallsiteInstrumentation(IC);
if (!Instr)
return;
Module &M = *IC.getParent()->getModule();
const uint32_t CallID = Instr->getIndex()->getZExtValue();
Profile.visit(
[&](const PGOCtxProfContext &Ctx) {
const auto &Targets = Ctx.callsites().find(CallID);
if (Targets == Ctx.callsites().end())
return;
for (const auto &[Guid, _] : Targets->second)
if (auto Name = Profile.getFunctionName(Guid); !Name.empty())
if (auto *Target = M.getFunction(Name))
if (Target->hasFnAttribute(Attribute::AlwaysInline))
Candidates.insert({&IC, Target});
},
IC.getCaller());
}
22 changes: 20 additions & 2 deletions llvm/lib/Transforms/IPO/ModuleInliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ using namespace llvm;
STATISTIC(NumInlined, "Number of functions inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");

cl::opt<bool> CtxProfPromoteAlwaysInline(
"ctx-prof-promote-alwaysinline", cl::init(false), cl::Hidden,
cl::desc("If using a contextual profile in this module, and an indirect "
"call target is marked as alwaysinline, perform indirect call "
"promotion for that target. If multiple targets for an indirect "
"call site fit this description, they are all promoted."));

/// Return true if the specified inline history ID
/// indicates an inline history that includes the specified function.
static bool inlineHistoryIncludes(
Expand Down Expand Up @@ -145,10 +152,11 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
assert(Calls != nullptr && "Expected an initialized InlineOrder");

// Populate the initial list of calls in this module.
SetVector<std::pair<CallBase *, Function *>> ICPCandidates;
for (Function &F : M) {
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
for (Instruction &I : instructions(F))
if (auto *CB = dyn_cast<CallBase>(&I))
for (Instruction &I : instructions(F)) {
if (auto *CB = dyn_cast<CallBase>(&I)) {
if (Function *Callee = CB->getCalledFunction()) {
if (!Callee->isDeclaration())
Calls->push({CB, -1});
Expand All @@ -163,7 +171,17 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
<< setIsVerbose();
});
}
} else if (CtxProfPromoteAlwaysInline && CtxProf &&
CB->isIndirectCall()) {
CtxProfAnalysis::collectIndirectCallPromotionList(*CB, CtxProf,
ICPCandidates);
}
}
}
}
for (auto &[CB, Target] : ICPCandidates) {
if (auto *DirectCB = promoteCallWithIfThenElse(*CB, *Target, CtxProf))
Calls->push({DirectCB, -1});
}
if (Calls->empty())
return PreservedAnalyses::all();
Expand Down
35 changes: 19 additions & 16 deletions llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -623,34 +623,37 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee,
// All the ctx-es belonging to a function must have the same size counters.
Ctx.resizeCounters(NewCountersSize);

// Maybe in this context, the indirect callsite wasn't observed at all
// Maybe in this context, the indirect callsite wasn't observed at all. That
// would make both direct and indirect BBs cold - which is what we already
// have from resising the counters.
if (!Ctx.hasCallsite(CSIndex))
return;
auto &CSData = Ctx.callsite(CSIndex);
auto It = CSData.find(CalleeGUID);

// Maybe we did notice the indirect callsite, but to other targets.
if (It == CSData.end())
return;

assert(CalleeGUID == It->second.guid());

uint32_t DirectCount = It->second.getEntrycount();
uint32_t TotalCount = 0;
uint64_t TotalCount = 0;
for (const auto &[_, V] : CSData)
TotalCount += V.getEntrycount();
uint64_t DirectCount = 0;
// If we called the direct target, update the DirectCount. If we didn't, we
// still want to update the indirect BB (to which the TotalCount goes, in
// that case).
if (auto It = CSData.find(CalleeGUID); It != CSData.end()) {
assert(CalleeGUID == It->second.guid());
DirectCount = It->second.getEntrycount();
// This direct target needs to be moved to this caller under the
// newly-allocated callsite index.
assert(Ctx.callsites().count(NewCSID) == 0);
Ctx.ingestContext(NewCSID, std::move(It->second));
CSData.erase(CalleeGUID);
}

assert(TotalCount >= DirectCount);
uint32_t IndirectCount = TotalCount - DirectCount;
uint64_t IndirectCount = TotalCount - DirectCount;
// The ICP's effect is as-if the direct BB would have been taken DirectCount
// times, and the indirect BB, IndirectCount times
Ctx.counters()[DirectID] = DirectCount;
Ctx.counters()[IndirectID] = IndirectCount;

// This particular indirect target needs to be moved to this caller under
// the newly-allocated callsite index.
assert(Ctx.callsites().count(NewCSID) == 0);
Ctx.ingestContext(NewCSID, std::move(It->second));
CSData.erase(CalleeGUID);
};
CtxProf.update(ProfileUpdater, &Caller);
return &DirectCall;
Expand Down
55 changes: 55 additions & 0 deletions llvm/test/Analysis/CtxProfAnalysis/flatten-icp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
; RUN: split-file %s %t
; RUN: llvm-ctxprof-util fromJSON --input %t/profile.json --output %t/profile.ctxprofdata
;
; In the given profile, in one of the contexts the indirect call is taken, the
; target we're trying to ICP - GUID:2000 - doesn't appear at all. That should
; contribute to the count of the "indirect call BB".
; RUN: opt %t/test.ll -S -passes='require<ctx-prof-analysis>,module-inline,ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata -ctx-prof-promote-alwaysinline

; CHECK-LABEL: define i32 @caller(ptr %c)
; CHECK-NEXT: [[CND:[0-9]+]] = icmp eq ptr %c, @one
; CHECK-NEXT: br i1 [[CND]], label %{{.*}}, label %{{.*}}, !prof ![[BW:[0-9]+]]

; CHECK: ![[BW]] = !{!"branch_weights", i32 10, i32 10}

;--- test.ll
declare i32 @external(i32 %x)
define i32 @one() #0 !guid !0 {
call void @llvm.instrprof.increment(ptr @one, i64 123, i32 1, i32 0)
call void @llvm.instrprof.callsite(ptr @one, i64 123, i32 1, i32 0, ptr @external)
%ret = call i32 @external(i32 1)
ret i32 %ret
}

define i32 @caller(ptr %c) #1 !guid !1 {
call void @llvm.instrprof.increment(ptr @caller, i64 567, i32 1, i32 0)
call void @llvm.instrprof.callsite(ptr @caller, i64 567, i32 1, i32 0, ptr %c)
%ret = call i32 %c()
ret i32 %ret
}

define i32 @root(ptr %c) !guid !2 {
call void @llvm.instrprof.increment(ptr @root, i64 432, i32 1, i32 0)
call void @llvm.instrprof.callsite(ptr @root, i64 432, i32 2, i32 0, ptr @caller)
%a = call i32 @caller(ptr %c)
call void @llvm.instrprof.callsite(ptr @root, i64 432, i32 2, i32 1, ptr @caller)
%b = call i32 @caller(ptr %c)
%ret = add i32 %a, %b
ret i32 %ret

}

attributes #0 = { alwaysinline }
attributes #1 = { noinline }
!0 = !{i64 1000}
!1 = !{i64 3000}
!2 = !{i64 4000}

;--- profile.json
[ {
"Guid": 4000, "Counters":[10], "Callsites": [
[{"Guid":3000, "Counters":[10], "Callsites":[[{"Guid":1000, "Counters":[10]}]]}],
[{"Guid":3000, "Counters":[10], "Callsites":[[{"Guid":9000, "Counters":[10]}]]}]
]
}
]
Loading