Skip to content

Commit a4778ec

Browse files
mtrofinSterling-Augustine
authored andcommitted
[nfc][ctx_prof] Efficient profile traversal and update (llvm#110052)
This optimizes profile updates and visits, where we want to access contexts for a specific function. These are all the current update cases. We do so by maintaining a list of contexts for each function, preserving preorder traversal. The list is updated whenever contexts are `std::move`-d or deleted.
1 parent 8b85630 commit a4778ec

File tree

5 files changed

+103
-16
lines changed

5 files changed

+103
-16
lines changed

llvm/include/llvm/Analysis/CtxProfAnalysis.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class PGOContextualProfile {
3535
uint32_t NextCounterIndex = 0;
3636
uint32_t NextCallsiteIndex = 0;
3737
const std::string Name;
38-
38+
PGOCtxProfContext Index;
3939
FunctionInfo(StringRef Name) : Name(Name) {}
4040
};
4141
std::optional<PGOCtxProfContext::CallTargetMapTy> Profiles;
@@ -50,6 +50,8 @@ class PGOContextualProfile {
5050
// its state piecemeal.
5151
PGOContextualProfile() = default;
5252

53+
void initIndex();
54+
5355
public:
5456
PGOContextualProfile(const PGOContextualProfile &) = delete;
5557
PGOContextualProfile(PGOContextualProfile &&) = default;
@@ -94,7 +96,7 @@ class PGOContextualProfile {
9496
using ConstVisitor = function_ref<void(const PGOCtxProfContext &)>;
9597
using Visitor = function_ref<void(PGOCtxProfContext &)>;
9698

97-
void update(Visitor, const Function *F = nullptr);
99+
void update(Visitor, const Function &F);
98100
void visit(ConstVisitor, const Function *F = nullptr) const;
99101

100102
const CtxProfFlatProfile flatten() const;

llvm/include/llvm/ProfileData/PGOCtxProfReader.h

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,74 @@
2222
#include <map>
2323

2424
namespace llvm {
25+
class PGOContextualProfile;
26+
class PGOCtxProfContext;
27+
28+
namespace internal {
29+
// When we traverse the contextual profile, we typically want to visit contexts
30+
// pertaining to a specific function. To avoid traversing the whole tree, we
31+
// want to keep a per-function list - which will be in preorder - of that
32+
// function's contexts. This happens in PGOContextualProfile. For memory use
33+
// efficiency, we want to make PGOCtxProfContext an intrusive double-linked list
34+
// node. We need to handle the cases where PGOCtxProfContext nodes are moved and
35+
// deleted: in both cases, we need to update the index (==list). We can do that
36+
// directly from the node in the list, without knowing who the "parent" of the
37+
// list is. That makes the ADT ilist overkill here. Finally, IndexNode is meant
38+
// to be an implementation detail of PGOCtxProfContext, and the only reason it's
39+
// factored out is to avoid implementing move semantics for all its members.
40+
class IndexNode {
41+
// This class' members are intentionally private - it's a convenience
42+
// implementation detail.
43+
friend class ::llvm::PGOCtxProfContext;
44+
friend class ::llvm::PGOContextualProfile;
45+
46+
IndexNode *Previous = nullptr;
47+
IndexNode *Next = nullptr;
48+
49+
~IndexNode() {
50+
if (Next)
51+
Next->Previous = Previous;
52+
if (Previous)
53+
Previous->Next = Next;
54+
}
55+
56+
IndexNode(const IndexNode &Other) = delete;
57+
58+
IndexNode(IndexNode &&Other) {
59+
// Copy the neighbor info
60+
Next = Other.Next;
61+
Previous = Other.Previous;
62+
63+
// Update the neighbors to point to this object
64+
if (Other.Next)
65+
Other.Next->Previous = this;
66+
if (Other.Previous)
67+
Other.Previous->Next = this;
68+
69+
// Make sure the dtor is a noop
70+
Other.Next = nullptr;
71+
Other.Previous = nullptr;
72+
}
73+
IndexNode() = default;
74+
};
75+
} // namespace internal
76+
2577
/// A node (context) in the loaded contextual profile, suitable for mutation
2678
/// during IPO passes. We generally expect a fraction of counters and
2779
/// callsites to be populated. We continue to model counters as vectors, but
2880
/// callsites are modeled as a map of a map. The expectation is that, typically,
2981
/// there is a small number of indirect targets (usually, 1 for direct calls);
3082
/// but potentially a large number of callsites, and, as inlining progresses,
3183
/// the callsite count of a caller will grow.
32-
class PGOCtxProfContext final {
84+
class PGOCtxProfContext final : public internal::IndexNode {
3385
public:
3486
using CallTargetMapTy = std::map<GlobalValue::GUID, PGOCtxProfContext>;
3587
using CallsiteMapTy = std::map<uint32_t, CallTargetMapTy>;
3688

3789
private:
3890
friend class PGOCtxProfileReader;
91+
friend class PGOContextualProfile;
92+
3993
GlobalValue::GUID GUID = 0;
4094
SmallVector<uint64_t, 16> Counters;
4195
CallsiteMapTy Callsites;
@@ -47,11 +101,15 @@ class PGOCtxProfContext final {
47101
getOrEmplace(uint32_t Index, GlobalValue::GUID G,
48102
SmallVectorImpl<uint64_t> &&Counters);
49103

104+
// Create a bogus context object, used for anchoring the index double linked
105+
// list - see IndexNode
106+
PGOCtxProfContext() = default;
107+
50108
public:
51109
PGOCtxProfContext(const PGOCtxProfContext &) = delete;
52110
PGOCtxProfContext &operator=(const PGOCtxProfContext &) = delete;
53111
PGOCtxProfContext(PGOCtxProfContext &&) = default;
54-
PGOCtxProfContext &operator=(PGOCtxProfContext &&) = default;
112+
PGOCtxProfContext &operator=(PGOCtxProfContext &&) = delete;
55113

56114
GlobalValue::GUID guid() const { return GUID; }
57115
const SmallVectorImpl<uint64_t> &counters() const { return Counters; }

llvm/lib/Analysis/CtxProfAnalysis.cpp

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ PGOContextualProfile CtxProfAnalysis::run(Module &M,
184184
// If we made it this far, the Result is valid - which we mark by setting
185185
// .Profiles.
186186
Result.Profiles = std::move(*MaybeCtx);
187+
Result.initIndex();
187188
return Result;
188189
}
189190

@@ -265,11 +266,9 @@ CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
265266

266267
template <class ProfilesTy, class ProfTy>
267268
static void preorderVisit(ProfilesTy &Profiles,
268-
function_ref<void(ProfTy &)> Visitor,
269-
GlobalValue::GUID Match = 0) {
269+
function_ref<void(ProfTy &)> Visitor) {
270270
std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
271-
if (!Match || Ctx.guid() == Match)
272-
Visitor(Ctx);
271+
Visitor(Ctx);
273272
for (auto &[_, SubCtxSet] : Ctx.callsites())
274273
for (auto &[__, Subctx] : SubCtxSet)
275274
Traverser(Subctx);
@@ -278,16 +277,44 @@ static void preorderVisit(ProfilesTy &Profiles,
278277
Traverser(P);
279278
}
280279

281-
void PGOContextualProfile::update(Visitor V, const Function *F) {
282-
GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U;
280+
void PGOContextualProfile::initIndex() {
281+
// Initialize the head of the index list for each function. We don't need it
282+
// after this point.
283+
DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;
284+
for (auto &[Guid, FI] : FuncInfo)
285+
InsertionPoints[Guid] = &FI.Index;
283286
preorderVisit<PGOCtxProfContext::CallTargetMapTy, PGOCtxProfContext>(
284-
*Profiles, V, G);
287+
*Profiles, [&](PGOCtxProfContext &Ctx) {
288+
auto InsertIt = InsertionPoints.find(Ctx.guid());
289+
if (InsertIt == InsertionPoints.end())
290+
return;
291+
// Insert at the end of the list. Since we traverse in preorder, it
292+
// means that when we iterate the list from the beginning, we'd
293+
// encounter the contexts in the order we would have, should we have
294+
// performed a full preorder traversal.
295+
InsertIt->second->Next = &Ctx;
296+
Ctx.Previous = InsertIt->second;
297+
InsertIt->second = &Ctx;
298+
});
299+
}
300+
301+
void PGOContextualProfile::update(Visitor V, const Function &F) {
302+
assert(isFunctionKnown(F));
303+
GlobalValue::GUID G = getDefinedFunctionGUID(F);
304+
for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
305+
Node = Node->Next)
306+
V(*reinterpret_cast<PGOCtxProfContext *>(Node));
285307
}
286308

287309
void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {
288-
GlobalValue::GUID G = F ? getDefinedFunctionGUID(*F) : 0U;
289-
preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
290-
const PGOCtxProfContext>(*Profiles, V, G);
310+
if (!F)
311+
return preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
312+
const PGOCtxProfContext>(*Profiles, V);
313+
assert(isFunctionKnown(*F));
314+
GlobalValue::GUID G = getDefinedFunctionGUID(*F);
315+
for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
316+
Node = Node->Next)
317+
V(*reinterpret_cast<const PGOCtxProfContext *>(Node));
291318
}
292319

293320
const CtxProfFlatProfile PGOContextualProfile::flatten() const {

llvm/lib/Transforms/Utils/CallPromotionUtils.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,7 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee,
655655
Ctx.counters()[IndirectID] = IndirectCount;
656656

657657
};
658-
CtxProf.update(ProfileUpdater, &Caller);
658+
CtxProf.update(ProfileUpdater, Caller);
659659
return &DirectCall;
660660
}
661661

llvm/lib/Transforms/Utils/InlineFunction.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2375,7 +2375,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
23752375
assert(Deleted);
23762376
(void)Deleted;
23772377
};
2378-
CtxProf.update(Updater, &Caller);
2378+
CtxProf.update(Updater, Caller);
23792379
return Ret;
23802380
}
23812381

0 commit comments

Comments
 (0)