Skip to content

Commit 4c5156c

Browse files
committed
Use new profile probe encoding
Created using spr 1.3.4
2 parents 8fafc04 + b1be6e6 commit 4c5156c

File tree

502 files changed

+6687
-14466
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

502 files changed

+6687
-14466
lines changed

bolt/include/bolt/Profile/ProfileYAMLMapping.h

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -95,28 +95,30 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {
9595

9696
namespace bolt {
9797
struct PseudoProbeInfo {
98-
uint64_t Index;
99-
uint32_t InlineTreeIndex;
100-
llvm::yaml::Hex32 Offset{0};
101-
uint8_t Type;
98+
uint32_t InlineTreeIndex = 0;
99+
uint64_t BlockMask = 0; // bitset with probe indices
100+
// Assume BlockMask == 1 if no other probes are set
101+
std::vector<uint64_t> BlockProbes;
102+
std::vector<uint64_t> CallProbes;
103+
std::vector<uint64_t> IndCallProbes;
104+
std::vector<uint32_t> InlineTreeNodes;
102105

103106
bool operator==(const PseudoProbeInfo &Other) const {
104-
return InlineTreeIndex == Other.InlineTreeIndex && Index == Other.Index;
105-
}
106-
bool operator<(const PseudoProbeInfo &Other) const {
107-
if (InlineTreeIndex == Other.InlineTreeIndex)
108-
return Index < Other.Index;
109-
return InlineTreeIndex < Other.InlineTreeIndex;
107+
return InlineTreeIndex == Other.InlineTreeIndex &&
108+
BlockProbes == Other.BlockProbes && CallProbes == Other.CallProbes &&
109+
IndCallProbes == Other.IndCallProbes;
110110
}
111111
};
112112
} // end namespace bolt
113113

114114
template <> struct MappingTraits<bolt::PseudoProbeInfo> {
115115
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
116-
YamlIO.mapRequired("id", PI.Index);
117-
YamlIO.mapRequired("type", PI.Type);
118-
YamlIO.mapOptional("inline_tree_id", PI.InlineTreeIndex, (uint32_t)0);
119-
YamlIO.mapOptional("offset", PI.Offset, (uint32_t)0);
116+
YamlIO.mapOptional("blk", PI.BlockMask, 0);
117+
YamlIO.mapOptional("blks", PI.BlockProbes, std::vector<uint64_t>());
118+
YamlIO.mapOptional("calls", PI.CallProbes, std::vector<uint64_t>());
119+
YamlIO.mapOptional("indcalls", PI.IndCallProbes, std::vector<uint64_t>());
120+
YamlIO.mapOptional("id", PI.InlineTreeIndex, 0);
121+
YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>());
120122
}
121123

122124
static const bool flow = true;
@@ -126,7 +128,7 @@ template <> struct MappingTraits<bolt::PseudoProbeInfo> {
126128

127129
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::CallSiteInfo)
128130
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::SuccessorInfo)
129-
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeInfo)
131+
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeInfo)
130132

131133
namespace llvm {
132134
namespace yaml {
@@ -162,31 +164,26 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
162164
std::vector<bolt::CallSiteInfo>());
163165
YamlIO.mapOptional("succ", BBP.Successors,
164166
std::vector<bolt::SuccessorInfo>());
165-
YamlIO.mapOptional("pseudo_probes", BBP.PseudoProbes,
167+
YamlIO.mapOptional("probes", BBP.PseudoProbes,
166168
std::vector<bolt::PseudoProbeInfo>());
167169
}
168170
};
169171

170172
namespace bolt {
171173
struct InlineTreeInfo {
172-
uint32_t Index;
173-
uint32_t ParentIndex;
174+
uint32_t ParentIndexDelta;
174175
uint32_t CallSiteProbe;
175-
llvm::yaml::Hex64 GUID;
176-
llvm::yaml::Hex64 Hash;
177-
bool operator==(const InlineTreeInfo &Other) const {
178-
return Index == Other.Index;
179-
}
176+
// Index in PseudoProbeDesc.GUID + 1, 0 for same as previous
177+
uint32_t GUIDIndex;
178+
bool operator==(const InlineTreeInfo &) const { return false; }
180179
};
181180
} // end namespace bolt
182181

183182
template <> struct MappingTraits<bolt::InlineTreeInfo> {
184183
static void mapping(IO &YamlIO, bolt::InlineTreeInfo &ITI) {
185-
YamlIO.mapRequired("guid", ITI.GUID);
186-
YamlIO.mapRequired("hash", ITI.Hash);
187-
YamlIO.mapRequired("id", ITI.Index);
188-
YamlIO.mapOptional("parent", ITI.ParentIndex, (uint32_t)0);
189-
YamlIO.mapOptional("callsite", ITI.CallSiteProbe, 0);
184+
YamlIO.mapOptional("g", ITI.GUIDIndex, 0);
185+
YamlIO.mapOptional("p", ITI.ParentIndexDelta, 0);
186+
YamlIO.mapOptional("cs", ITI.CallSiteProbe, 0);
190187
}
191188

192189
static const bool flow = true;
@@ -195,7 +192,7 @@ template <> struct MappingTraits<bolt::InlineTreeInfo> {
195192
} // end namespace llvm
196193

197194
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryBasicBlockProfile)
198-
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::InlineTreeInfo)
195+
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::InlineTreeInfo)
199196

200197
namespace llvm {
201198
namespace yaml {
@@ -273,10 +270,26 @@ template <> struct MappingTraits<bolt::BinaryProfileHeader> {
273270
}
274271
};
275272

273+
namespace bolt {
274+
struct PseudoProbeDesc {
275+
std::vector<Hex64> GUID;
276+
std::vector<Hex64> Hash;
277+
std::vector<uint32_t> GUIDHash; // Index of hash for that GUID in Hash
278+
};
279+
} // end namespace bolt
280+
281+
template <> struct MappingTraits<bolt::PseudoProbeDesc> {
282+
static void mapping(IO &YamlIO, bolt::PseudoProbeDesc &PD) {
283+
YamlIO.mapRequired("gs", PD.GUID);
284+
YamlIO.mapRequired("gh", PD.GUIDHash);
285+
YamlIO.mapRequired("hs", PD.Hash);
286+
}
287+
};
276288
} // end namespace yaml
277289
} // end namespace llvm
278290

279291
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryFunctionProfile)
292+
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeDesc)
280293

281294
namespace llvm {
282295
namespace yaml {
@@ -285,13 +298,15 @@ namespace bolt {
285298
struct BinaryProfile {
286299
BinaryProfileHeader Header;
287300
std::vector<BinaryFunctionProfile> Functions;
301+
PseudoProbeDesc PseudoProbeDesc;
288302
};
289303
} // namespace bolt
290304

291305
template <> struct MappingTraits<bolt::BinaryProfile> {
292306
static void mapping(IO &YamlIO, bolt::BinaryProfile &BP) {
293307
YamlIO.mapRequired("header", BP.Header);
294308
YamlIO.mapRequired("functions", BP.Functions);
309+
YamlIO.mapOptional("pseudo_probe_desc", BP.PseudoProbeDesc);
295310
}
296311
};
297312

bolt/include/bolt/Profile/YAMLProfileWriter.h

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,68 @@ class YAMLProfileWriter {
3232
/// Save execution profile for that instance.
3333
std::error_code writeProfile(const RewriteInstance &RI);
3434

35+
using InlineTreeMapTy =
36+
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>;
37+
struct InlineTreeDesc {
38+
template <typename T> using GUIDMapTy = std::unordered_map<uint64_t, T>;
39+
using GUIDNodeMap = GUIDMapTy<const MCDecodedPseudoProbeInlineTree *>;
40+
using GUIDNumMap = GUIDMapTy<uint32_t>;
41+
GUIDNodeMap TopLevelGUIDToInlineTree;
42+
GUIDNumMap GUIDIdxMap;
43+
GUIDNumMap HashIdxMap;
44+
};
45+
46+
static std::tuple<std::vector<yaml::bolt::InlineTreeInfo>, InlineTreeMapTy>
47+
convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
48+
const InlineTreeDesc &InlineTree, uint64_t GUID);
49+
3550
static yaml::bolt::BinaryFunctionProfile
3651
convert(const BinaryFunction &BF, bool UseDFS,
52+
const InlineTreeDesc &InlineTree,
3753
const BoltAddressTranslation *BAT = nullptr);
3854

55+
static std::tuple<yaml::bolt::PseudoProbeDesc, InlineTreeDesc>
56+
convertPseudoProbeDesc(const MCPseudoProbeDecoder &PseudoProbeDecoder);
57+
3958
/// Set CallSiteInfo destination fields from \p Symbol and return a target
4059
/// BinaryFunction for that symbol.
4160
static const BinaryFunction *
4261
setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
4362
const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
4463
uint32_t Offset = 0);
45-
};
4664

65+
private:
66+
struct InlineTreeNode {
67+
const MCDecodedPseudoProbeInlineTree *InlineTree;
68+
uint64_t GUID;
69+
uint64_t Hash;
70+
uint32_t ParentId;
71+
uint32_t InlineSite;
72+
};
73+
static std::vector<InlineTreeNode>
74+
getInlineTree(const MCPseudoProbeDecoder &Decoder,
75+
const MCDecodedPseudoProbeInlineTree *Root);
76+
77+
// 0 - block probe, 1 - indirect call, 2 - direct call
78+
using ProbeList = std::array<SmallVector<uint64_t, 0>, 3>;
79+
using NodeIdToProbes = DenseMap<uint32_t, ProbeList>;
80+
static std::vector<yaml::bolt::PseudoProbeInfo>
81+
convertNodeProbes(NodeIdToProbes &NodeProbes);
82+
83+
public:
84+
template <typename T>
85+
static std::vector<yaml::bolt::PseudoProbeInfo>
86+
writeBlockProbes(T Probes, const InlineTreeMapTy &InlineTreeNodeId) {
87+
NodeIdToProbes NodeProbes;
88+
for (const MCDecodedPseudoProbe &Probe : Probes) {
89+
auto It = InlineTreeNodeId.find(Probe.getInlineTreeNode());
90+
if (It == InlineTreeNodeId.end())
91+
continue;
92+
NodeProbes[It->second][Probe.getType()].emplace_back(Probe.getIndex());
93+
}
94+
return convertNodeProbes(NodeProbes);
95+
}
96+
};
4797
} // namespace bolt
4898
} // namespace llvm
4999

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 34 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
#include "llvm/Support/raw_ostream.h"
3535
#include <map>
3636
#include <optional>
37-
#include <queue>
3837
#include <unordered_map>
3938
#include <utility>
4039

@@ -2322,6 +2321,12 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
23222321
BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
23232322
: BinaryFunction::PF_LBR;
23242323

2324+
// Add probe inline tree nodes.
2325+
YAMLProfileWriter::InlineTreeDesc InlineTree;
2326+
if (PseudoProbeDecoder)
2327+
std::tie(BP.PseudoProbeDesc, InlineTree) =
2328+
YAMLProfileWriter::convertPseudoProbeDesc(*PseudoProbeDecoder);
2329+
23252330
if (!opts::BasicAggregation) {
23262331
// Convert profile for functions not covered by BAT
23272332
for (auto &BFI : BC.getBinaryFunctions()) {
@@ -2330,8 +2335,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
23302335
continue;
23312336
if (BAT->isBATFunction(Function.getAddress()))
23322337
continue;
2333-
BP.Functions.emplace_back(
2334-
YAMLProfileWriter::convert(Function, /*UseDFS=*/false, BAT));
2338+
BP.Functions.emplace_back(YAMLProfileWriter::convert(
2339+
Function, /*UseDFS=*/false, InlineTree, BAT));
23352340
}
23362341

23372342
for (const auto &KV : NamesToBranches) {
@@ -2403,74 +2408,49 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
24032408
const unsigned BlockIndex = BlockMap.getBBIndex(BI.To.Offset);
24042409
YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
24052410
}
2406-
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
2407-
InlineTreeNodeId;
2408-
if (PseudoProbeDecoder && BF->getGUID()) {
2409-
std::queue<const MCDecodedPseudoProbeInlineTree *> ITWorklist;
2410-
// FIXME: faster inline tree lookup by top-level GUID
2411-
if (const MCDecodedPseudoProbeInlineTree *InlineTree = llvm::find_if(
2412-
PseudoProbeDecoder->getDummyInlineRoot().getChildren(),
2413-
[&](const auto &InlineTree) {
2414-
return InlineTree.Guid == BF->getGUID();
2415-
})) {
2416-
ITWorklist.push(InlineTree);
2417-
InlineTreeNodeId[InlineTree] = 0;
2418-
auto Hash =
2419-
PseudoProbeDecoder->getFuncDescForGUID(BF->getGUID())->FuncHash;
2420-
YamlBF.InlineTree.emplace_back(
2421-
yaml::bolt::InlineTreeInfo{0, 0, 0, BF->getGUID(), Hash});
2422-
}
2423-
uint32_t ParentId = 0;
2424-
uint32_t NodeId = 1;
2425-
while (!ITWorklist.empty()) {
2426-
const MCDecodedPseudoProbeInlineTree *Cur = ITWorklist.front();
2427-
for (const MCDecodedPseudoProbeInlineTree &Child :
2428-
Cur->getChildren()) {
2429-
InlineTreeNodeId[&Child] = NodeId;
2430-
auto Hash =
2431-
PseudoProbeDecoder->getFuncDescForGUID(Child.Guid)->FuncHash;
2432-
YamlBF.InlineTree.emplace_back(yaml::bolt::InlineTreeInfo{
2433-
NodeId++, ParentId, std::get<1>(Child.getInlineSite()),
2434-
Child.Guid, Hash});
2435-
ITWorklist.push(&Child);
2436-
}
2437-
ITWorklist.pop();
2438-
++ParentId;
2439-
}
2440-
}
2441-
24422411
if (PseudoProbeDecoder) {
2412+
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
2413+
InlineTreeNodeId;
2414+
if (BF->getGUID()) {
2415+
std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
2416+
YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder,
2417+
InlineTree, BF->getGUID());
2418+
}
24432419
// Fetch probes belonging to all fragments
24442420
const AddressProbesMap &ProbeMap =
24452421
PseudoProbeDecoder->getAddress2ProbesMap();
24462422
BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
24472423
Fragments.insert(BF);
2424+
DenseMap<
2425+
uint32_t,
2426+
std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
2427+
BlockProbes;
24482428
for (const BinaryFunction *F : Fragments) {
24492429
const uint64_t FuncAddr = F->getAddress();
24502430
for (const MCDecodedPseudoProbe &Probe :
24512431
ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) {
24522432
const uint32_t OutputAddress = Probe.getAddress();
24532433
const uint32_t InputOffset = BAT->translate(
24542434
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
2455-
const auto [BlockOffset, BlockIndex] = getBlock(InputOffset);
2456-
uint32_t NodeId = InlineTreeNodeId[Probe.getInlineTreeNode()];
2457-
uint32_t Offset = InputOffset - BlockOffset;
2458-
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
2459-
yaml::bolt::PseudoProbeInfo{Probe.getIndex(), NodeId, Offset,
2460-
Probe.getType()});
2435+
const unsigned BlockIndex = getBlock(InputOffset).second;
2436+
BlockProbes[BlockIndex].emplace_back(Probe);
24612437
}
24622438
}
2463-
for (yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) {
2464-
llvm::sort(YamlBB.PseudoProbes);
2465-
YamlBB.PseudoProbes.erase(llvm::unique(YamlBB.PseudoProbes),
2466-
YamlBB.PseudoProbes.end());
2439+
2440+
for (auto &[Block, Probes] : BlockProbes) {
2441+
YamlBF.Blocks[Block].PseudoProbes =
2442+
YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
24672443
}
24682444
}
2469-
// Drop blocks without a hash, won't be useful for stale matching.
2470-
llvm::erase_if(YamlBF.Blocks,
2471-
[](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
2472-
return YamlBB.Hash == (yaml::Hex64)0;
2473-
});
2445+
// Skip printing if there's no profile data
2446+
llvm::erase_if(
2447+
YamlBF.Blocks, [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
2448+
auto HasCount = [](const auto &SI) { return SI.Count; };
2449+
bool HasAnyCount = YamlBB.ExecCount ||
2450+
llvm::any_of(YamlBB.Successors, HasCount) ||
2451+
llvm::any_of(YamlBB.CallSites, HasCount);
2452+
return !HasAnyCount;
2453+
});
24742454
BP.Functions.emplace_back(YamlBF);
24752455
}
24762456
}

0 commit comments

Comments
 (0)