Skip to content

Commit 04ebd19

Browse files
authored
[MC][NFC] Statically allocate storage for decoded pseudo probes and function records
Use #102774 to allocate storage for decoded probes (`PseudoProbeVec`) and function records (`InlineTreeVec`). Leverage that to also shrink sizes of `MCDecodedPseudoProbe`: - Drop Guid since it's accessible via `InlineTree`. `MCDecodedPseudoProbeInlineTree`: - Keep track of probes and inlinees using `ArrayRef`s now that probes and function records belonging to the same function are allocated contiguously. This reduces peak RSS from 13.7 GiB to 9.7 GiB and pseudo probe parsing time (as part of perf2bolt) from 15.3s to 9.6s for a large binary with 400MiB .pseudo_probe section containing 43M probes and 25M function records. Depends on: #102774 #102787 #102788 Reviewers: maksfb, rafaelauler, dcci, ayermolo, wlei-llvm Reviewed By: wlei-llvm Pull Request: #102789
1 parent 121ed07 commit 04ebd19

File tree

5 files changed

+164
-76
lines changed

5 files changed

+164
-76
lines changed

bolt/lib/Rewrite/PseudoProbeRewriter.cpp

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,9 @@ void PseudoProbeRewriter::updatePseudoProbes() {
200200
}
201201

202202
unsigned ProbeTrack = AP.second.size();
203-
std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin();
203+
auto Probe = llvm::map_iterator(
204+
AP.second.begin(),
205+
[](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); });
204206
while (ProbeTrack != 0) {
205207
if (Probe->isBlock()) {
206208
Probe->setAddress(BlkOutputAddress);
@@ -218,9 +220,7 @@ void PseudoProbeRewriter::updatePseudoProbes() {
218220
}
219221

220222
while (CallOutputAddress != CallOutputAddresses.second) {
221-
AP.second.push_back(*Probe);
222-
AP.second.back().setAddress(CallOutputAddress->second);
223-
Probe->getInlineTreeNode()->addProbes(&(AP.second.back()));
223+
ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second);
224224
CallOutputAddress = std::next(CallOutputAddress);
225225
}
226226
}
@@ -332,7 +332,7 @@ void PseudoProbeRewriter::encodePseudoProbes() {
332332
ProbeDecoder.getDummyInlineRoot();
333333
for (auto Child = Root.getChildren().begin();
334334
Child != Root.getChildren().end(); ++Child)
335-
Inlinees[Child->first] = Child->second.get();
335+
Inlinees[Child->getInlineSite()] = &*Child;
336336

337337
for (auto Inlinee : Inlinees)
338338
// INT64_MAX is "placeholder" of unused callsite index field in the pair
@@ -358,25 +358,37 @@ void PseudoProbeRewriter::encodePseudoProbes() {
358358
EmitInt(Cur->Guid, 8);
359359
// Emit number of probes in this node
360360
uint64_t Deleted = 0;
361-
for (MCDecodedPseudoProbe *&Probe : Cur->getProbes())
361+
for (MCDecodedPseudoProbe *&Probe :
362+
llvm::make_pointer_range(Cur->getProbes()))
362363
if (Probe->getAddress() == INT64_MAX)
363364
Deleted++;
364365
LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n");
365-
uint64_t ProbesSize = Cur->getProbes().size() - Deleted;
366+
size_t InjectedProbes = ProbeDecoder.getNumInjectedProbes(Cur);
367+
uint64_t ProbesSize = Cur->getProbes().size() - Deleted + InjectedProbes;
366368
EmitULEB128IntValue(ProbesSize);
367369
// Emit number of direct inlinees
368370
EmitULEB128IntValue(Cur->getChildren().size());
369371
// Emit probes in this group
370-
for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) {
372+
for (MCDecodedPseudoProbe *&Probe :
373+
llvm::make_pointer_range(Cur->getProbes())) {
371374
if (Probe->getAddress() == INT64_MAX)
372375
continue;
373376
EmitDecodedPseudoProbe(Probe);
374377
LastProbe = Probe;
375378
}
379+
if (InjectedProbes) {
380+
for (MCDecodedPseudoProbe *&Probe :
381+
llvm::make_pointer_range(ProbeDecoder.getInjectedProbes(Cur))) {
382+
if (Probe->getAddress() == INT64_MAX)
383+
continue;
384+
EmitDecodedPseudoProbe(Probe);
385+
LastProbe = Probe;
386+
}
387+
}
376388

377389
for (auto Child = Cur->getChildren().begin();
378390
Child != Cur->getChildren().end(); ++Child)
379-
Inlinees[Child->first] = Child->second.get();
391+
Inlinees[Child->getInlineSite()] = &*Child;
380392
for (const auto &Inlinee : Inlinees) {
381393
assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid");
382394
NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second});

llvm/include/llvm/MC/MCPseudoProbe.h

Lines changed: 95 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -54,20 +54,21 @@
5454
#ifndef LLVM_MC_MCPSEUDOPROBE_H
5555
#define LLVM_MC_MCPSEUDOPROBE_H
5656

57+
#include "llvm/ADT/ArrayRef.h"
5758
#include "llvm/ADT/DenseMap.h"
5859
#include "llvm/ADT/DenseSet.h"
5960
#include "llvm/ADT/SmallVector.h"
6061
#include "llvm/ADT/StringRef.h"
62+
#include "llvm/ADT/iterator.h"
6163
#include "llvm/IR/PseudoProbe.h"
6264
#include "llvm/Support/ErrorOr.h"
63-
#include <list>
65+
#include <functional>
6466
#include <map>
6567
#include <memory>
6668
#include <string>
6769
#include <tuple>
6870
#include <type_traits>
6971
#include <unordered_map>
70-
#include <unordered_set>
7172
#include <vector>
7273

7374
namespace llvm {
@@ -103,14 +104,15 @@ using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
103104
using GUIDProbeFunctionMap =
104105
std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
105106
// Address to pseudo probes map.
106-
using AddressProbesMap = std::map<uint64_t, std::list<MCDecodedPseudoProbe>>;
107+
using AddressProbesMap =
108+
std::map<uint64_t,
109+
std::vector<std::reference_wrapper<MCDecodedPseudoProbe>>>;
107110

108111
class MCDecodedPseudoProbeInlineTree;
109112

110113
class MCPseudoProbeBase {
111114
protected:
112-
uint64_t Guid;
113-
uint64_t Index;
115+
uint32_t Index;
114116
uint32_t Discriminator;
115117
uint8_t Attributes;
116118
uint8_t Type;
@@ -120,14 +122,12 @@ class MCPseudoProbeBase {
120122
const static uint32_t PseudoProbeFirstId = 1;
121123

122124
public:
123-
MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T, uint32_t D)
124-
: Guid(G), Index(I), Discriminator(D), Attributes(At), Type(T) {}
125+
MCPseudoProbeBase(uint64_t I, uint64_t At, uint8_t T, uint32_t D)
126+
: Index(I), Discriminator(D), Attributes(At), Type(T) {}
125127

126128
bool isEntry() const { return Index == PseudoProbeFirstId; }
127129

128-
uint64_t getGuid() const { return Guid; }
129-
130-
uint64_t getIndex() const { return Index; }
130+
uint32_t getIndex() const { return Index; }
131131

132132
uint32_t getDiscriminator() const { return Discriminator; }
133133

@@ -157,18 +157,20 @@ class MCPseudoProbeBase {
157157
/// uses an address from a temporary label created at the current address in the
158158
/// current section.
159159
class MCPseudoProbe : public MCPseudoProbeBase {
160+
uint64_t Guid;
160161
MCSymbol *Label;
161162

162163
public:
163164
MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
164165
uint64_t Attributes, uint32_t Discriminator)
165-
: MCPseudoProbeBase(Guid, Index, Attributes, Type, Discriminator),
166+
: MCPseudoProbeBase(Index, Attributes, Type, Discriminator), Guid(Guid),
166167
Label(Label) {
167168
assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
168169
assert(Attributes <= 0xFF &&
169170
"Probe attributes too big to encode, exceeding 2^16");
170171
}
171172

173+
uint64_t getGuid() const { return Guid; };
172174
MCSymbol *getLabel() const { return Label; }
173175
void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
174176
};
@@ -181,11 +183,11 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
181183
MCDecodedPseudoProbeInlineTree *InlineTree;
182184

183185
public:
184-
MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
185-
uint8_t At, uint32_t D,
186-
MCDecodedPseudoProbeInlineTree *Tree)
187-
: MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K), D), Address(Ad),
186+
MCDecodedPseudoProbe(uint64_t Ad, uint32_t I, PseudoProbeType K, uint8_t At,
187+
uint32_t D, MCDecodedPseudoProbeInlineTree *Tree)
188+
: MCPseudoProbeBase(I, At, static_cast<uint8_t>(K), D), Address(Ad),
188189
InlineTree(Tree){};
190+
uint64_t getGuid() const;
189191

190192
uint64_t getAddress() const { return Address; }
191193

@@ -211,21 +213,14 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
211213
bool ShowName) const;
212214
};
213215

214-
template <typename ProbeType, typename DerivedProbeInlineTreeType>
216+
template <typename ProbesType, typename DerivedProbeInlineTreeType,
217+
typename InlinedProbeTreeMap>
215218
class MCPseudoProbeInlineTreeBase {
216-
struct InlineSiteHash {
217-
uint64_t operator()(const InlineSite &Site) const {
218-
return std::get<0>(Site) ^ std::get<1>(Site);
219-
}
220-
};
221-
222219
protected:
223220
// Track children (e.g. inlinees) of current context
224-
using InlinedProbeTreeMap = std::unordered_map<
225-
InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>;
226221
InlinedProbeTreeMap Children;
227222
// Set of probes that come with the function.
228-
std::vector<ProbeType> Probes;
223+
ProbesType Probes;
229224
MCPseudoProbeInlineTreeBase() {
230225
static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase,
231226
DerivedProbeInlineTreeType>::value,
@@ -240,12 +235,10 @@ class MCPseudoProbeInlineTreeBase {
240235
bool isRoot() const { return Guid == 0; }
241236
InlinedProbeTreeMap &getChildren() { return Children; }
242237
const InlinedProbeTreeMap &getChildren() const { return Children; }
243-
std::vector<ProbeType> &getProbes() { return Probes; }
244-
const std::vector<ProbeType> &getProbes() const { return Probes; }
245-
void addProbes(ProbeType Probe) { Probes.push_back(Probe); }
238+
const ProbesType &getProbes() const { return Probes; }
246239
// Caller node of the inline site
247-
MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent =
248-
nullptr;
240+
MCPseudoProbeInlineTreeBase<ProbesType, DerivedProbeInlineTreeType,
241+
InlinedProbeTreeMap> *Parent = nullptr;
249242
DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
250243
auto Ret = Children.emplace(
251244
Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
@@ -259,9 +252,17 @@ class MCPseudoProbeInlineTreeBase {
259252
// instance is created as the root of a tree.
260253
// A real instance of this class is created for each function, either a
261254
// not inlined function that has code in .text section or an inlined function.
255+
struct InlineSiteHash {
256+
uint64_t operator()(const InlineSite &Site) const {
257+
return std::get<0>(Site) ^ std::get<1>(Site);
258+
}
259+
};
262260
class MCPseudoProbeInlineTree
263-
: public MCPseudoProbeInlineTreeBase<MCPseudoProbe,
264-
MCPseudoProbeInlineTree> {
261+
: public MCPseudoProbeInlineTreeBase<
262+
std::vector<MCPseudoProbe>, MCPseudoProbeInlineTree,
263+
std::unordered_map<InlineSite,
264+
std::unique_ptr<MCPseudoProbeInlineTree>,
265+
InlineSiteHash>> {
265266
public:
266267
MCPseudoProbeInlineTree() = default;
267268
MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; }
@@ -277,16 +278,31 @@ class MCPseudoProbeInlineTree
277278

278279
// inline tree node for the decoded pseudo probe
279280
class MCDecodedPseudoProbeInlineTree
280-
: public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
281-
MCDecodedPseudoProbeInlineTree> {
282-
public:
283-
InlineSite ISite;
281+
: public MCPseudoProbeInlineTreeBase<
282+
MCDecodedPseudoProbe *, MCDecodedPseudoProbeInlineTree,
283+
MutableArrayRef<MCDecodedPseudoProbeInlineTree>> {
284+
uint32_t NumProbes = 0;
285+
uint32_t ProbeId = 0;
284286

287+
public:
285288
MCDecodedPseudoProbeInlineTree() = default;
286-
MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
289+
MCDecodedPseudoProbeInlineTree(const InlineSite &Site,
290+
MCDecodedPseudoProbeInlineTree *Parent)
291+
: ProbeId(std::get<1>(Site)) {
292+
this->Guid = std::get<0>(Site);
293+
this->Parent = Parent;
294+
}
287295

288296
// Return false if it's a dummy inline site
289297
bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
298+
InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); }
299+
void setProbes(MutableArrayRef<MCDecodedPseudoProbe> ProbesRef) {
300+
Probes = ProbesRef.data();
301+
NumProbes = ProbesRef.size();
302+
}
303+
auto getProbes() const {
304+
return MutableArrayRef<MCDecodedPseudoProbe>(Probes, NumProbes);
305+
}
290306
};
291307

292308
/// Instances of this class represent the pseudo probes inserted into a compile
@@ -336,6 +352,20 @@ class MCPseudoProbeTable {
336352
};
337353

338354
class MCPseudoProbeDecoder {
355+
// Decoded pseudo probes vector.
356+
std::vector<MCDecodedPseudoProbe> PseudoProbeVec;
357+
// Injected pseudo probes, identified by the containing inline tree node.
358+
// Need to keep injected probes separately for two reasons:
359+
// 1) Probes cannot be added to the PseudoProbeVec: appending may cause
360+
// reallocation so that pointers to its elements will become invalid.
361+
// 2) Probes belonging to function record must be contiguous in PseudoProbeVec
362+
// as owning InlineTree references them with an ArrayRef to save space.
363+
std::unordered_map<const MCDecodedPseudoProbeInlineTree *,
364+
std::vector<MCDecodedPseudoProbe>>
365+
InjectedProbeMap;
366+
// Decoded inline records vector.
367+
std::vector<MCDecodedPseudoProbeInlineTree> InlineTreeVec;
368+
339369
// GUID to PseudoProbeFuncDesc map.
340370
GUIDProbeFunctionMap GUID2FuncDescMap;
341371

@@ -382,10 +412,6 @@ class MCPseudoProbeDecoder {
382412
const Uint64Set &GuildFilter,
383413
const Uint64Map &FuncStartAddrs);
384414

385-
bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
386-
uint64_t &LastAddr, const Uint64Set &GuildFilter,
387-
const Uint64Map &FuncStartAddrs);
388-
389415
// Print pseudo_probe_desc section info
390416
void printGUID2FuncDescMap(raw_ostream &OS);
391417

@@ -428,6 +454,34 @@ class MCPseudoProbeDecoder {
428454
const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
429455
return DummyInlineRoot;
430456
}
457+
458+
void addInjectedProbe(const MCDecodedPseudoProbe &Probe, uint64_t Address) {
459+
const MCDecodedPseudoProbeInlineTree *Parent = Probe.getInlineTreeNode();
460+
InjectedProbeMap[Parent].emplace_back(Probe).setAddress(Address);
461+
}
462+
463+
size_t
464+
getNumInjectedProbes(const MCDecodedPseudoProbeInlineTree *Parent) const {
465+
auto It = InjectedProbeMap.find(Parent);
466+
if (It == InjectedProbeMap.end())
467+
return 0;
468+
return It->second.size();
469+
}
470+
471+
auto getInjectedProbes(MCDecodedPseudoProbeInlineTree *Parent) {
472+
auto It = InjectedProbeMap.find(Parent);
473+
assert(It != InjectedProbeMap.end());
474+
return iterator_range(It->second);
475+
}
476+
477+
private:
478+
// Recursively parse an inlining tree encoded in pseudo_probe section. Returns
479+
// whether the the top-level node should be skipped.
480+
template <bool IsTopLevelFunc>
481+
bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
482+
uint64_t &LastAddr, const Uint64Set &GuildFilter,
483+
const Uint64Map &FuncStartAddrs,
484+
const uint32_t CurChildIndex);
431485
};
432486

433487
} // end namespace llvm

0 commit comments

Comments
 (0)