Skip to content

[nfc][ctx_prof] Efficient profile traversal and update #110052

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions llvm/include/llvm/Analysis/CtxProfAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class PGOContextualProfile {
uint32_t NextCounterIndex = 0;
uint32_t NextCallsiteIndex = 0;
const std::string Name;

PGOCtxProfContext Index;
FunctionInfo(StringRef Name) : Name(Name) {}
};
std::optional<PGOCtxProfContext::CallTargetMapTy> Profiles;
Expand All @@ -50,6 +50,8 @@ class PGOContextualProfile {
// its state piecemeal.
PGOContextualProfile() = default;

void initIndex();

public:
PGOContextualProfile(const PGOContextualProfile &) = delete;
PGOContextualProfile(PGOContextualProfile &&) = default;
Expand Down Expand Up @@ -94,7 +96,7 @@ class PGOContextualProfile {
using ConstVisitor = function_ref<void(const PGOCtxProfContext &)>;
using Visitor = function_ref<void(PGOCtxProfContext &)>;

void update(Visitor, const Function *F = nullptr);
void update(Visitor, const Function &F);
void visit(ConstVisitor, const Function *F = nullptr) const;

const CtxProfFlatProfile flatten() const;
Expand Down
62 changes: 60 additions & 2 deletions llvm/include/llvm/ProfileData/PGOCtxProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,74 @@
#include <map>

namespace llvm {
class PGOContextualProfile;
class PGOCtxProfContext;

namespace internal {
// When we traverse the contextual profile, we typically want to visit contexts
// pertaining to a specific function. To avoid traversing the whole tree, we
// want to keep a per-function list - which will be in preorder - of that
// function's contexts. This happens in PGOContextualProfile. For memory use
// efficiency, we want to make PGOCtxProfContext an intrusive double-linked list
// node. We need to handle the cases where PGOCtxProfContext nodes are moved and
// deleted: in both cases, we need to update the index (==list). We can do that
// directly from the node in the list, without knowing who the "parent" of the
// list is. That makes the ADT ilist overkill here. Finally, IndexNode is meant
// to be an implementation detail of PGOCtxProfContext, and the only reason it's
// factored out is to avoid implementing move semantics for all its members.
class IndexNode {
// This class' members are intentionally private - it's a convenience
// implementation detail.
friend class ::llvm::PGOCtxProfContext;
friend class ::llvm::PGOContextualProfile;

IndexNode *Previous = nullptr;
IndexNode *Next = nullptr;

~IndexNode() {
if (Next)
Next->Previous = Previous;
if (Previous)
Previous->Next = Next;
}

IndexNode(const IndexNode &Other) = delete;

IndexNode(IndexNode &&Other) {
// Copy the neighbor info
Next = Other.Next;
Previous = Other.Previous;

// Update the neighbors to point to this object
if (Other.Next)
Other.Next->Previous = this;
if (Other.Previous)
Other.Previous->Next = this;

// Make sure the dtor is a noop
Other.Next = nullptr;
Other.Previous = nullptr;
}
IndexNode() = default;
};
} // namespace internal

/// A node (context) in the loaded contextual profile, suitable for mutation
/// during IPO passes. We generally expect a fraction of counters and
/// callsites to be populated. We continue to model counters as vectors, but
/// callsites are modeled as a map of a map. The expectation is that, typically,
/// there is a small number of indirect targets (usually, 1 for direct calls);
/// but potentially a large number of callsites, and, as inlining progresses,
/// the callsite count of a caller will grow.
class PGOCtxProfContext final {
class PGOCtxProfContext final : public internal::IndexNode {
public:
using CallTargetMapTy = std::map<GlobalValue::GUID, PGOCtxProfContext>;
using CallsiteMapTy = std::map<uint32_t, CallTargetMapTy>;

private:
friend class PGOCtxProfileReader;
friend class PGOContextualProfile;

GlobalValue::GUID GUID = 0;
SmallVector<uint64_t, 16> Counters;
CallsiteMapTy Callsites;
Expand All @@ -47,11 +101,15 @@ class PGOCtxProfContext final {
getOrEmplace(uint32_t Index, GlobalValue::GUID G,
SmallVectorImpl<uint64_t> &&Counters);

// Create a bogus context object, used for anchoring the index double linked
// list - see IndexNode
PGOCtxProfContext() = default;

public:
PGOCtxProfContext(const PGOCtxProfContext &) = delete;
PGOCtxProfContext &operator=(const PGOCtxProfContext &) = delete;
PGOCtxProfContext(PGOCtxProfContext &&) = default;
PGOCtxProfContext &operator=(PGOCtxProfContext &&) = default;
PGOCtxProfContext &operator=(PGOCtxProfContext &&) = delete;

GlobalValue::GUID guid() const { return GUID; }
const SmallVectorImpl<uint64_t> &counters() const { return Counters; }
Expand Down
47 changes: 37 additions & 10 deletions llvm/lib/Analysis/CtxProfAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ PGOContextualProfile CtxProfAnalysis::run(Module &M,
// If we made it this far, the Result is valid - which we mark by setting
// .Profiles.
Result.Profiles = std::move(*MaybeCtx);
Result.initIndex();
return Result;
}

Expand Down Expand Up @@ -266,11 +267,9 @@ CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {

template <class ProfilesTy, class ProfTy>
static void preorderVisit(ProfilesTy &Profiles,
function_ref<void(ProfTy &)> Visitor,
GlobalValue::GUID Match = 0) {
function_ref<void(ProfTy &)> Visitor) {
std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
if (!Match || Ctx.guid() == Match)
Visitor(Ctx);
Visitor(Ctx);
for (auto &[_, SubCtxSet] : Ctx.callsites())
for (auto &[__, Subctx] : SubCtxSet)
Traverser(Subctx);
Expand All @@ -279,16 +278,44 @@ static void preorderVisit(ProfilesTy &Profiles,
Traverser(P);
}

void PGOContextualProfile::update(Visitor V, const Function *F) {
GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U;
void PGOContextualProfile::initIndex() {
// Initialize the head of the index list for each function. We don't need it
// after this point.
DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;
for (auto &[Guid, FI] : FuncInfo)
InsertionPoints[Guid] = &FI.Index;
preorderVisit<PGOCtxProfContext::CallTargetMapTy, PGOCtxProfContext>(
*Profiles, V, G);
*Profiles, [&](PGOCtxProfContext &Ctx) {
auto InsertIt = InsertionPoints.find(Ctx.guid());
if (InsertIt == InsertionPoints.end())
return;
// Insert at the end of the list. Since we traverse in preorder, it
// means that when we iterate the list from the beginning, we'd
// encounter the contexts in the order we would have, should we have
// performed a full preorder traversal.
InsertIt->second->Next = &Ctx;
Ctx.Previous = InsertIt->second;
InsertIt->second = &Ctx;
});
}

void PGOContextualProfile::update(Visitor V, const Function &F) {
assert(isFunctionKnown(F));
GlobalValue::GUID G = getDefinedFunctionGUID(F);
for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
Node = Node->Next)
V(*reinterpret_cast<PGOCtxProfContext *>(Node));
}

void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {
GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U;
preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
const PGOCtxProfContext>(*Profiles, V, G);
if (!F)
return preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
const PGOCtxProfContext>(*Profiles, V);
assert(isFunctionKnown(*F));
GlobalValue::GUID G = getDefinedFunctionGUID(*F);
for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
Node = Node->Next)
V(*reinterpret_cast<const PGOCtxProfContext *>(Node));
}

const CtxProfFlatProfile PGOContextualProfile::flatten() const {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee,
Ctx.counters()[IndirectID] = IndirectCount;

};
CtxProf.update(ProfileUpdater, &Caller);
CtxProf.update(ProfileUpdater, Caller);
return &DirectCall;
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Utils/InlineFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2375,7 +2375,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
assert(Deleted);
(void)Deleted;
};
CtxProf.update(Updater, &Caller);
CtxProf.update(Updater, Caller);
return Ret;
}

Expand Down
Loading