Skip to content

Commit 7e86b13

Browse files
committed
[CSSPGO][llvm-profgen] Reimplement SampleContextTracker using context trie
This is the followup patch to https://reviews.llvm.org/D125246 for the `SampleContextTracker` part. Before the promotion and merging of the context is based on the SampleContext(the array of frame), this causes a lot of cost to the memory. This patch detaches the tracker from using the array ref instead to use the context trie itself. This can save a lot of memory usage and benefit both the compiler's CS inliner and llvm-profgen's pre-inliner. One structure needs to be specially treated is the `FuncToCtxtProfiles`, this is used to get all the functionSamples for one function to do the merging and promoting. Before it search each functions' context and traverse the trie to get the node of the context. Now we don't have the context inside the profile, instead we directly use an auxiliary map `ProfileToNodeMap` for profile , it initialize to create the FunctionSamples to TrieNode relations and keep updating it during promoting and merging the node. Moreover, I was expecting the results before and after remain the same, but I found that the order of FuncToCtxtProfiles matter and affect the results. This can happen on recursive context case, but the difference should be small. Now we don't have the context, so I just used a vector for the order, the result is still deterministic. Measured on one huge size(12GB) profile from one of our internal service. The profile similarity difference is 99.999%, and the running time is improved by 3X(debug mode) and the memory is reduced from 170GB to 90GB. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D127031
1 parent aa58b7b commit 7e86b13

File tree

9 files changed

+204
-165
lines changed

9 files changed

+204
-165
lines changed

llvm/include/llvm/ProfileData/SampleProf.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -553,16 +553,6 @@ class SampleContext {
553553
}
554554
}
555555

556-
// Promote context by removing top frames with the length of
557-
// `ContextFramesToRemove`. Note that with array representation of context,
558-
// the promotion is effectively a slice operation with first
559-
// `ContextFramesToRemove` elements removed from left.
560-
void promoteOnPath(uint32_t ContextFramesToRemove) {
561-
assert(ContextFramesToRemove <= FullContext.size() &&
562-
"Cannot remove more than the whole context");
563-
FullContext = FullContext.drop_front(ContextFramesToRemove);
564-
}
565-
566556
// Decode context string for a frame to get function name and location.
567557
// `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
568558
static void decodeContextString(StringRef ContextStr, StringRef &FName,

llvm/include/llvm/Transforms/IPO/SampleContextTracker.h

Lines changed: 72 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/StringRef.h"
2020
#include "llvm/ProfileData/SampleProf.h"
2121
#include <map>
22+
#include <queue>
2223
#include <vector>
2324

2425
namespace llvm {
@@ -44,11 +45,6 @@ class ContextTrieNode {
4445
ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite,
4546
StringRef ChildName,
4647
bool AllowCreate = true);
47-
48-
ContextTrieNode &moveToChildContext(const LineLocation &CallSite,
49-
ContextTrieNode &&NodeToMove,
50-
uint32_t ContextFramesToRemove,
51-
bool DeleteNode = true);
5248
void removeChildContext(const LineLocation &CallSite, StringRef ChildName);
5349
std::map<uint64_t, ContextTrieNode> &getAllChildContext();
5450
StringRef getFuncName() const;
@@ -59,6 +55,7 @@ class ContextTrieNode {
5955
LineLocation getCallSiteLoc() const;
6056
ContextTrieNode *getParentContext() const;
6157
void setParentContext(ContextTrieNode *Parent);
58+
void setCallSiteLoc(const LineLocation &Loc);
6259
void dumpNode();
6360
void dumpTree();
6461

@@ -91,23 +88,13 @@ class ContextTrieNode {
9188
// calling context and the context is identified by path from root to the node.
9289
class SampleContextTracker {
9390
public:
94-
struct ProfileComparer {
95-
bool operator()(FunctionSamples *A, FunctionSamples *B) const {
96-
// Sort function profiles by the number of total samples and their
97-
// contexts.
98-
if (A->getTotalSamples() == B->getTotalSamples())
99-
return A->getContext() < B->getContext();
100-
return A->getTotalSamples() > B->getTotalSamples();
101-
}
102-
};
103-
104-
// Keep profiles of a function sorted so that they will be processed/promoted
105-
// deterministically.
106-
using ContextSamplesTy = std::set<FunctionSamples *, ProfileComparer>;
91+
using ContextSamplesTy = std::vector<FunctionSamples *>;
10792

10893
SampleContextTracker() = default;
10994
SampleContextTracker(SampleProfileMap &Profiles,
11095
const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap);
96+
// Populate the FuncToCtxtProfiles map after the trie is built.
97+
void populateFuncToCtxtMap();
11198
// Query context profile for a specific callee with given name at a given
11299
// call-site. The full context is identified by location of call instruction.
113100
FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst,
@@ -145,6 +132,61 @@ class SampleContextTracker {
145132

146133
// Create a merged conext-less profile map.
147134
void createContextLessProfileMap(SampleProfileMap &ContextLessProfiles);
135+
ContextTrieNode *
136+
getContextNodeForProfile(const FunctionSamples *FSamples) const {
137+
auto I = ProfileToNodeMap.find(FSamples);
138+
if (I == ProfileToNodeMap.end())
139+
return nullptr;
140+
return I->second;
141+
}
142+
StringMap<ContextSamplesTy> &getFuncToCtxtProfiles() {
143+
return FuncToCtxtProfiles;
144+
}
145+
146+
class Iterator : public std::iterator<std::forward_iterator_tag,
147+
const ContextTrieNode *> {
148+
std::queue<ContextTrieNode *> NodeQueue;
149+
150+
public:
151+
explicit Iterator() = default;
152+
explicit Iterator(ContextTrieNode *Node) { NodeQueue.push(Node); }
153+
Iterator &operator++() {
154+
assert(!NodeQueue.empty() && "Iterator already at the end");
155+
ContextTrieNode *Node = NodeQueue.front();
156+
NodeQueue.pop();
157+
for (auto &It : Node->getAllChildContext())
158+
NodeQueue.push(&It.second);
159+
return *this;
160+
}
161+
162+
Iterator operator++(int) {
163+
assert(!NodeQueue.empty() && "Iterator already at the end");
164+
Iterator Ret = *this;
165+
++(*this);
166+
return Ret;
167+
}
168+
bool operator==(const Iterator &Other) const {
169+
if (NodeQueue.empty() && Other.NodeQueue.empty())
170+
return true;
171+
if (NodeQueue.empty() || Other.NodeQueue.empty())
172+
return false;
173+
return NodeQueue.front() == Other.NodeQueue.front();
174+
}
175+
bool operator!=(const Iterator &Other) const { return !(*this == Other); }
176+
ContextTrieNode *operator*() const {
177+
assert(!NodeQueue.empty() && "Invalid access to end iterator");
178+
return NodeQueue.front();
179+
}
180+
};
181+
182+
Iterator begin() { return Iterator(&RootContext); }
183+
Iterator end() { return Iterator(); }
184+
185+
#ifndef NDEBUG
186+
// Get a context string from root to current node.
187+
std::string getContextString(const FunctionSamples &FSamples) const;
188+
std::string getContextString(ContextTrieNode *Node) const;
189+
#endif
148190
// Dump the internal context profile trie.
149191
void dump();
150192

@@ -155,15 +197,23 @@ class SampleContextTracker {
155197
ContextTrieNode *getTopLevelContextNode(StringRef FName);
156198
ContextTrieNode &addTopLevelContextNode(StringRef FName);
157199
ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo);
158-
void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode,
159-
uint32_t ContextFramesToRemove);
200+
void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode);
160201
ContextTrieNode &
161202
promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
162-
ContextTrieNode &ToNodeParent,
163-
uint32_t ContextFramesToRemove);
203+
ContextTrieNode &ToNodeParent);
204+
ContextTrieNode &moveContextSamples(ContextTrieNode &ToNodeParent,
205+
const LineLocation &CallSite,
206+
ContextTrieNode &&NodeToMove);
207+
void setContextNode(const FunctionSamples *FSample, ContextTrieNode *Node) {
208+
ProfileToNodeMap[FSample] = Node;
209+
}
164210
// Map from function name to context profiles (excluding base profile)
165211
StringMap<ContextSamplesTy> FuncToCtxtProfiles;
166212

213+
// Map from current FunctionSample to the belonged context trie.
214+
std::unordered_map<const FunctionSamples *, ContextTrieNode *>
215+
ProfileToNodeMap;
216+
167217
// Map from function guid to real function names. Only used in md5 mode.
168218
const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap;
169219

0 commit comments

Comments
 (0)