-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MC][NFC] Statically allocate storage for decoded pseudo probes and function records #102789
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MC][NFC] Statically allocate storage for decoded pseudo probes and function records #102789
Conversation
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
@llvm/pr-subscribers-pgo @llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) ChangesUse #102774 to allocate storage for decoded probes ( Full diff: https://github.com/llvm/llvm-project/pull/102789.diff 4 Files Affected:
diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
index 37a5b937ebcaa..e5ced0f75d187 100644
--- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
+++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp
@@ -200,7 +200,9 @@ void PseudoProbeRewriter::updatePseudoProbes() {
}
unsigned ProbeTrack = AP.second.size();
- std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin();
+ auto Probe = llvm::map_iterator(
+ AP.second.begin(),
+ [](auto RW) -> MCDecodedPseudoProbe & { return RW.get(); });
while (ProbeTrack != 0) {
if (Probe->isBlock()) {
Probe->setAddress(BlkOutputAddress);
@@ -218,9 +220,7 @@ void PseudoProbeRewriter::updatePseudoProbes() {
}
while (CallOutputAddress != CallOutputAddresses.second) {
- AP.second.push_back(*Probe);
- AP.second.back().setAddress(CallOutputAddress->second);
- Probe->getInlineTreeNode()->addProbes(&(AP.second.back()));
+ ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second);
CallOutputAddress = std::next(CallOutputAddress);
}
}
@@ -332,7 +332,7 @@ void PseudoProbeRewriter::encodePseudoProbes() {
ProbeDecoder.getDummyInlineRoot();
for (auto Child = Root.getChildren().begin();
Child != Root.getChildren().end(); ++Child)
- Inlinees[Child->first] = Child->second.get();
+ Inlinees[Child->getInlineSite()] = &*Child;
for (auto Inlinee : Inlinees)
// INT64_MAX is "placeholder" of unused callsite index field in the pair
@@ -362,7 +362,8 @@ void PseudoProbeRewriter::encodePseudoProbes() {
if (Probe->getAddress() == INT64_MAX)
Deleted++;
LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n");
- uint64_t ProbesSize = Cur->getProbes().size() - Deleted;
+ size_t InjectedProbes = ProbeDecoder.getNumInjectedProbes(Cur);
+ uint64_t ProbesSize = Cur->Probes.size() + InjectedProbes - Deleted;
EmitULEB128IntValue(ProbesSize);
// Emit number of direct inlinees
EmitULEB128IntValue(Cur->getChildren().size());
@@ -373,10 +374,18 @@ void PseudoProbeRewriter::encodePseudoProbes() {
EmitDecodedPseudoProbe(Probe);
LastProbe = Probe;
}
+ if (InjectedProbes) {
+ for (MCDecodedPseudoProbe *&Probe : ProbeDecoder.getInjectedProbes(Cur)) {
+ if (Probe->getAddress() == INT64_MAX)
+ continue;
+ EmitDecodedPseudoProbe(Probe);
+ LastProbe = Probe;
+ }
+ }
for (auto Child = Cur->getChildren().begin();
Child != Cur->getChildren().end(); ++Child)
- Inlinees[Child->first] = Child->second.get();
+ Inlinees[Child->getInlineSite()] = &*Child;
for (const auto &Inlinee : Inlinees) {
assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid");
NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second});
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 5c536a8ec0b55..5dba797be5c30 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -54,20 +54,21 @@
#ifndef LLVM_MC_MCPSEUDOPROBE_H
#define LLVM_MC_MCPSEUDOPROBE_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/Support/ErrorOr.h"
-#include <list>
+#include <functional>
#include <map>
#include <memory>
#include <string>
#include <tuple>
#include <type_traits>
#include <unordered_map>
-#include <unordered_set>
#include <vector>
namespace llvm {
@@ -103,7 +104,9 @@ using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
using GUIDProbeFunctionMap =
std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
// Address to pseudo probes map.
-using AddressProbesMap = std::map<uint64_t, std::list<MCDecodedPseudoProbe>>;
+using AddressProbesMap =
+ std::map<uint64_t,
+ std::vector<std::reference_wrapper<MCDecodedPseudoProbe>>>;
class MCDecodedPseudoProbeInlineTree;
@@ -276,17 +279,31 @@ class MCPseudoProbeInlineTree
};
// inline tree node for the decoded pseudo probe
-class MCDecodedPseudoProbeInlineTree
- : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
- MCDecodedPseudoProbeInlineTree> {
+class MCDecodedPseudoProbeInlineTree {
public:
- InlineSite ISite;
+ uint64_t Guid = 0;
+ uint32_t ProbeId = 0;
+
+ // Caller node of the inline site
+ MCDecodedPseudoProbeInlineTree *Parent = nullptr;
MCDecodedPseudoProbeInlineTree() = default;
- MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
+ MCDecodedPseudoProbeInlineTree(uint64_t Guid, uint32_t ProbeId,
+ MCDecodedPseudoProbeInlineTree *Parent)
+ : Guid(Guid), ProbeId(ProbeId), Parent(Parent) {}
+
+ // Track children (e.g. inlinees) of current context
+ MutableArrayRef<MCDecodedPseudoProbeInlineTree> Children;
+ // Set of probes that come with the function.
+ MutableArrayRef<MCDecodedPseudoProbe> Probes;
+ // Root node has a GUID 0.
+ bool isRoot() const { return Guid == 0; }
// Return false if it's a dummy inline site
bool hasInlineSite() const { return !isRoot() && !Parent->isRoot(); }
+ InlineSite getInlineSite() const { return InlineSite(Guid, ProbeId); }
+ auto getChildren() const { return Children; }
+ auto getProbes() const { return llvm::make_pointer_range(Probes); }
};
/// Instances of this class represent the pseudo probes inserted into a compile
@@ -336,6 +353,15 @@ class MCPseudoProbeTable {
};
class MCPseudoProbeDecoder {
+ // Decoded pseudo probes vector.
+ std::vector<MCDecodedPseudoProbe> PseudoProbeVec;
+ // Injected pseudo probes, identified by the containing inline tree node.
+ std::unordered_map<const MCDecodedPseudoProbeInlineTree *,
+ std::vector<MCDecodedPseudoProbe>>
+ InjectedProbeMap;
+ // Decoded inline records vector.
+ std::vector<MCDecodedPseudoProbeInlineTree> InlineTreeVec;
+
// GUID to PseudoProbeFuncDesc map.
GUIDProbeFunctionMap GUID2FuncDescMap;
@@ -381,10 +407,6 @@ class MCPseudoProbeDecoder {
const Uint64Set &GuildFilter,
const Uint64Map &FuncStartAddrs);
- bool buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
- uint64_t &LastAddr, const Uint64Set &GuildFilter,
- const Uint64Map &FuncStartAddrs);
-
// Print pseudo_probe_desc section info
void printGUID2FuncDescMap(raw_ostream &OS);
@@ -427,6 +449,31 @@ class MCPseudoProbeDecoder {
const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
return DummyInlineRoot;
}
+
+ void addInjectedProbe(const MCDecodedPseudoProbe &Probe, uint64_t Address) {
+ const MCDecodedPseudoProbeInlineTree *Parent = Probe.getInlineTreeNode();
+ InjectedProbeMap[Parent].emplace_back(Probe).setAddress(Address);
+ }
+
+ size_t
+ getNumInjectedProbes(const MCDecodedPseudoProbeInlineTree *Parent) const {
+ auto It = InjectedProbeMap.find(Parent);
+ if (It == InjectedProbeMap.end())
+ return 0;
+ return It->second.size();
+ }
+
+ auto getInjectedProbes(MCDecodedPseudoProbeInlineTree *Parent) {
+ auto It = InjectedProbeMap.find(Parent);
+ assert(It != InjectedProbeMap.end());
+ return llvm::make_pointer_range(iterator_range(It->second));
+ }
+
+private:
+ void buildAddress2ProbeMap(MCDecodedPseudoProbeInlineTree *Cur,
+ uint64_t &LastAddr, const Uint64Set &GuildFilter,
+ const Uint64Map &FuncStartAddrs,
+ uint32_t &CurChild);
};
} // end namespace llvm
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index e9e391c8eadd7..9be93445e14cf 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -290,7 +290,7 @@ void MCDecodedPseudoProbe::getInlineContext(
while (Cur->hasInlineSite()) {
StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Cur->Parent->Guid);
ContextStack.emplace_back(
- MCPseudoProbeFrameLocation(FuncName, std::get<1>(Cur->ISite)));
+ MCPseudoProbeFrameLocation(FuncName, Cur->ProbeId));
Cur = static_cast<MCDecodedPseudoProbeInlineTree *>(Cur->Parent);
}
// Make the ContextStack in caller-callee order
@@ -417,9 +417,10 @@ bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
return true;
}
-bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
+void MCPseudoProbeDecoder::buildAddress2ProbeMap(
MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr,
- const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs) {
+ const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs,
+ uint32_t &CurChild) {
// The pseudo_probe section encodes an inline forest and each tree has a
// format defined in MCPseudoProbe.h
@@ -427,7 +428,7 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
bool IsTopLevelFunc = Cur == &DummyInlineRoot;
if (IsTopLevelFunc) {
// Use a sequential id for top level inliner.
- Index = Cur->getChildren().size();
+ Index = CurChild;
} else {
// Read inline site for inlinees
Index = cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
@@ -443,17 +444,21 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
// If the incoming node is null, all its children nodes should be disgarded.
if (Cur) {
// Switch/add to a new tree node(inlinee)
- Cur = Cur->getOrAddNode(std::make_tuple(Guid, Index));
- Cur->Guid = Guid;
+ Cur->Children[CurChild] = MCDecodedPseudoProbeInlineTree(Guid, Index, Cur);
+ Cur = &Cur->Children[CurChild];
if (IsTopLevelFunc && !EncodingIsAddrBased) {
if (auto V = FuncStartAddrs.lookup(Guid))
LastAddr = V;
}
}
+ // Advance CurChild for non-skipped top-level functions and unconditionally
+ // for inlined functions.
+ CurChild += Cur || !IsTopLevelFunc;
// Read number of probes in the current node.
uint32_t NodeCount =
cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
+ uint32_t CurrentProbeCount = 0;
// Read number of direct inlinees
uint32_t ChildrenToProcess =
cantFail(errorOrToExpected(readUnsignedNumber<uint32_t>()));
@@ -494,19 +499,22 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
}
if (Cur && !isSentinelProbe(Attr)) {
- // Populate Address2ProbesMap
- auto &Probes = Address2ProbesMap[Addr];
- Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr,
- Discriminator, Cur);
- Cur->addProbes(&Probes.back());
+ PseudoProbeVec.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind),
+ Attr, Discriminator, Cur);
+ Address2ProbesMap[Addr].emplace_back(PseudoProbeVec.back());
+ ++CurrentProbeCount;
}
LastAddr = Addr;
}
- for (uint32_t I = 0; I < ChildrenToProcess; I++) {
- buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs);
+ if (Cur) {
+ Cur->Probes = MutableArrayRef(PseudoProbeVec).take_back(CurrentProbeCount);
+ InlineTreeVec.resize(InlineTreeVec.size() + ChildrenToProcess);
+ Cur->Children = MutableArrayRef(InlineTreeVec).take_back(ChildrenToProcess);
+ }
+ for (uint32_t I = 0; I < ChildrenToProcess;) {
+ buildAddress2ProbeMap(Cur, LastAddr, GuidFilter, FuncStartAddrs, I);
}
- return true;
}
bool MCPseudoProbeDecoder::countRecords(bool IsTopLevelFunc, bool &Discard,
@@ -605,13 +613,20 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
TopLevelFuncs += !Discard;
}
assert(Data == End && "Have unprocessed data in pseudo_probe section");
+ PseudoProbeVec.reserve(ProbeCount);
+ InlineTreeVec.reserve(InlinedCount);
+
+ // Allocate top-level function records as children of DummyInlineRoot.
+ InlineTreeVec.resize(TopLevelFuncs);
+ DummyInlineRoot.Children = MutableArrayRef(InlineTreeVec);
Data = Start;
End = Data + Size;
uint64_t LastAddr = 0;
+ uint32_t Child = 0;
while (Data < End)
buildAddress2ProbeMap(&DummyInlineRoot, LastAddr, GuidFilter,
- FuncStartAddrs);
+ FuncStartAddrs, Child);
assert(Data == End && "Have unprocessed data in pseudo_probe section");
return true;
}
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 574a9c9f52bf1..fe7d3ffa476eb 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -132,7 +132,7 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway(
MCPseudoProbeDecoder &ProbeDecoder) {
ProbeFrameStack ProbeContext;
for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
- trackInlineesOptimizedAway(ProbeDecoder, *Child.second, ProbeContext);
+ trackInlineesOptimizedAway(ProbeDecoder, Child, ProbeContext);
}
void BinarySizeContextTracker::trackInlineesOptimizedAway(
@@ -160,9 +160,9 @@ void BinarySizeContextTracker::trackInlineesOptimizedAway(
// DFS down the probe inline tree
for (const auto &ChildNode : ProbeNode.getChildren()) {
- InlineSite Location = ChildNode.first;
+ InlineSite Location = ChildNode.getInlineSite();
ProbeContext.back().second = std::get<1>(Location);
- trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second, ProbeContext);
+ trackInlineesOptimizedAway(ProbeDecoder, ChildNode, ProbeContext);
}
ProbeContext.pop_back();
@@ -454,8 +454,8 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
// Build TopLevelProbeFrameMap to track size for optimized inlinees when probe
// is available
if (TrackFuncContextSize) {
- for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
- auto *Frame = Child.second.get();
+ for (auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
+ auto *Frame = &Child;
StringRef FuncName =
ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName;
TopLevelProbeFrameMap[FuncName] = Frame;
|
cc @wlei-llvm we should measure memory on llvm-profgen for this too. |
Created using spr 1.3.5 [skip ci]
Created using spr 1.3.5
llvm/lib/MC/MCPseudoProbe.cpp
Outdated
MCDecodedPseudoProbeInlineTree *Cur, uint64_t &LastAddr, | ||
const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs) { | ||
const Uint64Set &GuidFilter, const Uint64Map &FuncStartAddrs, | ||
uint32_t &CurChild) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about renaming CurChild
to CurChildIndex
? Here the type of Cur
is MCDecodedPseudoProbeInlineTree
but the type of CurChild
is uint32_t
llvm/lib/MC/MCPseudoProbe.cpp
Outdated
// Advance CurChild for non-skipped top-level functions and unconditionally | ||
// for inlined functions. | ||
if (IsTopLevelFunc) | ||
CurChild += !!Cur; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to move the index advancing things outside of the function(to the caller)?
For non-top-level funcs, just like the typical iteration:
for (uint32_t I = 0; ...; I++)
....
which would be better for the readability? also then we don't need to pass it by reference.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was initially this way, but I had to change to advancing CurChildIndex from within buildAddress2ProbeMap. The problem is if GuidFilter is in place, we will only allocate enough top-level entries for filtered functions. Therefore we can't advance CurChildIndex from top-level buildAddress2ProbeMap invocation (GUID is not yet parsed).
We can advance non-top level child index though.
Let me refactor this a bit so it's easier to follow.
Created using spr 1.3.4
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the improvement, I will try it on llvm-profgen to get some numbers.
AP.second.push_back(*Probe); | ||
AP.second.back().setAddress(CallOutputAddress->second); | ||
Probe->getInlineTreeNode()->addProbes(&(AP.second.back())); | ||
ProbeDecoder.addInjectedProbe(*Probe, CallOutputAddress->second); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why we need this change? seems it's not related to decoding pseudo probe, or is it because we allocate a fixed size of MCDecodedPseudoProbe
, so later there is no way to add additional probe to the vector. To address this, we have to use a new container InjectedProbeMap
to save new probes. If so, could you add comments to explain this(maybe in the definition of InjectedProbeMap
)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's required because appending to ProbeVec may cause its reallocation so that pointers to its elements will become invalid. I'll leave a comment.
llvm/include/llvm/MC/MCPseudoProbe.h
Outdated
class MCDecodedPseudoProbeInlineTree | ||
: public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *, | ||
MCDecodedPseudoProbeInlineTree> { | ||
class MCDecodedPseudoProbeInlineTree { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now that this class is no longer derived from MCPseudoProbeInlineTreeBase
, MCPseudoProbeInlineTree
will be the only derived class ofMCPseudoProbeInlineTreeBase
, there is no need for the base class? we may need to refactor it.
Created using spr 1.3.4 [skip ci]
Created using spr 1.3.4
Created using spr 1.3.4
Measured
After those changes:
Awesome! |
@@ -605,13 +616,20 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap( | |||
TopLevelFuncs += !Discard; | |||
} | |||
assert(Data == End && "Have unprocessed data in pseudo_probe section"); | |||
PseudoProbeVec.reserve(ProbeCount); | |||
InlineTreeVec.reserve(InlinedCount); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since this assumes the vector's reserved size will be equal to the amount it will be used. How about adding assertions at the end of function. like assert(InlineTreeVec.size() == InlinedCount && ...)
this is for checking any accidental re-allocation/extension of the vector during the build the map.
llvm/include/llvm/MC/MCPseudoProbe.h
Outdated
std::vector<ProbeType> &getProbes() { return Probes; } | ||
const std::vector<ProbeType> &getProbes() const { return Probes; } | ||
void addProbes(ProbeType Probe) { Probes.push_back(Probe); } | ||
ProbesType &getProbes() { return Probes; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you know where we require this function(anywhere can't replace with const
), I thought we won't change the probe after it's decoded. is it possible to only keep only one getProbes
and getChildren
(with the const
)?
for (auto *Probe : I.first->getProbes()) { | ||
FunctionProfile->addBodySamples(Probe->getIndex(), | ||
Probe->getDiscriminator(), 0); | ||
for (auto &Probe : I.first->getProbes()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems this can be const auto
Created using spr 1.3.4
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks.
Created using spr 1.3.4 [skip ci]
An unordered_set include was removed from a header in 04ebd19 "[MC][NFC] Statically allocate storage for decoded pseudo probes and function records (llvm#102789)", but SPIRVEmitIntrinsics was getting the definition from that transitive include. Fix the build by including the header explicitly.
…unction records Use #102774 to allocate storage for decoded probes (`PseudoProbeVec`) and function records (`InlineTreeVec`). Leverage that to also shrink sizes of `MCDecodedPseudoProbe`: - Drop Guid since it's accessible via `InlineTree`. `MCDecodedPseudoProbeInlineTree`: - Keep track of probes and inlinees using `ArrayRef`s now that probes and function records belonging to the same function are allocated contiguously. This reduces peak RSS from 13.7 GiB to 9.7 GiB and pseudo probe parsing time (as part of perf2bolt) from 15.3s to 9.6s for a large binary with 400MiB .pseudo_probe section containing 43M probes and 25M function records. Depends on: #102774 #102787 #102788 Reviewers: maksfb, rafaelauler, dcci, ayermolo, wlei-llvm Reviewed By: wlei-llvm Pull Request: #102789
Use #102774 to allocate storage for decoded probes (
PseudoProbeVec
)and function records (
InlineTreeVec
).Leverage that to also shrink sizes of
MCDecodedPseudoProbe
:InlineTree
.MCDecodedPseudoProbeInlineTree
:ArrayRef
s now that probesand function records belonging to the same function are allocated
contiguously.
This reduces peak RSS from 13.7 GiB to 9.7 GiB and pseudo probe parsing
time (as part of perf2bolt) from 15.3s to 9.6s for a large binary with
400MiB .pseudo_probe section containing 43M probes and 25M function
records.
Depends on:
#102774
#102787
#102788