Skip to content

Commit 0bb4e3a

Browse files
committed
Memoize top-level GUID->InlineTree mapping, cuts inference time by ~30%
Created using spr 1.3.4
2 parents 544a6ad + ee214d5 commit 0bb4e3a

File tree

7 files changed

+67
-38
lines changed

7 files changed

+67
-38
lines changed

bolt/include/bolt/Profile/ProfileYAMLMapping.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,12 @@ struct PseudoProbeDesc {
275275
std::vector<Hex64> GUID;
276276
std::vector<Hex64> Hash;
277277
std::vector<uint32_t> GUIDHash; // Index of hash for that GUID in Hash
278+
279+
bool operator==(const PseudoProbeDesc &Other) const {
280+
// Only treat empty Desc as equal
281+
return GUID.empty() && Other.GUID.empty() && Hash.empty() &&
282+
Other.Hash.empty() && GUIDHash.empty() && Other.GUIDHash.empty();
283+
}
278284
};
279285
} // end namespace bolt
280286

@@ -306,7 +312,8 @@ template <> struct MappingTraits<bolt::BinaryProfile> {
306312
static void mapping(IO &YamlIO, bolt::BinaryProfile &BP) {
307313
YamlIO.mapRequired("header", BP.Header);
308314
YamlIO.mapRequired("functions", BP.Functions);
309-
YamlIO.mapOptional("pseudo_probe_desc", BP.PseudoProbeDesc);
315+
YamlIO.mapOptional("pseudo_probe_desc", BP.PseudoProbeDesc,
316+
bolt::PseudoProbeDesc());
310317
}
311318
};
312319

bolt/include/bolt/Profile/YAMLProfileReader.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include <unordered_set>
1515

1616
namespace llvm {
17+
class MCDecodedPseudoProbeInlineTree;
18+
1719
namespace bolt {
1820

1921
class YAMLProfileReader : public ProfileReaderBase {
@@ -43,6 +45,9 @@ class YAMLProfileReader : public ProfileReaderBase {
4345
using ProfileLookupMap =
4446
DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>;
4547

48+
using GUIDInlineTreeMap =
49+
std::unordered_map<uint64_t, const MCDecodedPseudoProbeInlineTree *>;
50+
4651
/// A class for matching binary functions in functions in the YAML profile.
4752
/// First, a call graph is constructed for both profiled and binary functions.
4853
/// Then functions are hashed based on the names of their callee/caller
@@ -129,6 +134,9 @@ class YAMLProfileReader : public ProfileReaderBase {
129134
/// BinaryFunction pointers indexed by YamlBP functions.
130135
std::vector<BinaryFunction *> ProfileBFs;
131136

137+
// Pseudo probe function GUID to inline tree node
138+
GUIDInlineTreeMap TopLevelGUIDToInlineTree;
139+
132140
/// Populate \p Function profile with the one supplied in YAML format.
133141
bool parseFunctionProfile(BinaryFunction &Function,
134142
const yaml::bolt::BinaryFunctionProfile &YamlBF);

bolt/lib/Profile/StaleProfileMatching.cpp

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,8 @@ cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
117117
"The cost of increasing an unknown fall-through jump count by one."),
118118
cl::init(3), cl::ReallyHidden, cl::cat(BoltOptCategory));
119119

120-
cl::opt<bool> StaleMatchingWithBlockPseudoProbes(
121-
"stale-matching-with-block-pseudo-probes",
120+
cl::opt<bool> StaleMatchingWithPseudoProbes(
121+
"stale-matching-with-pseudo-probes",
122122
cl::desc("Turns on stale matching with block pseudo probes."),
123123
cl::init(false), cl::ReallyHidden, cl::cat(BoltOptCategory));
124124

@@ -328,7 +328,7 @@ class StaleMatcher {
328328
std::pair<const FlowBlock *, bool> matchWithPseudoProbes(
329329
const ArrayRef<yaml::bolt::PseudoProbeInfo> BlockPseudoProbes,
330330
const ArrayRef<yaml::bolt::InlineTreeInfo> InlineTree) const {
331-
if (!opts::StaleMatchingWithBlockPseudoProbes)
331+
if (!opts::StaleMatchingWithPseudoProbes)
332332
return {nullptr, false};
333333

334334
DenseMap<const FlowBlock *, uint32_t> FlowBlockMatchCount;
@@ -574,7 +574,8 @@ size_t matchWeightsByHashes(
574574
BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
575575
const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func,
576576
HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF,
577-
const BinaryFunction &BF, const yaml::bolt::PseudoProbeDesc &YamlPD) {
577+
const BinaryFunction &BF, const yaml::bolt::PseudoProbeDesc &YamlPD,
578+
const YAMLProfileReader::GUIDInlineTreeMap &TopLevelGUIDToInlineTree) {
578579

579580
assert(Func.Blocks.size() == BlockOrder.size() + 2);
580581

@@ -605,21 +606,19 @@ size_t matchWeightsByHashes(
605606
}
606607
StaleMatcher Matcher;
607608
// Collects function pseudo probes for use in the StaleMatcher.
608-
if (opts::StaleMatchingWithBlockPseudoProbes) {
609-
const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder();
610-
assert(PseudoProbeDecoder &&
609+
if (opts::StaleMatchingWithPseudoProbes) {
610+
const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
611+
assert(Decoder &&
611612
"If pseudo probes are in use, pseudo probe decoder should exist");
612-
const AddressProbesMap &ProbeMap =
613-
PseudoProbeDecoder->getAddress2ProbesMap();
613+
const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap();
614614
const uint64_t FuncAddr = BF.getAddress();
615615
for (const MCDecodedPseudoProbe &Probe :
616616
ProbeMap.find(FuncAddr, FuncAddr + BF.getSize()))
617617
if (const BinaryBasicBlock *BB =
618618
BF.getBasicBlockContainingOffset(Probe.getAddress() - FuncAddr))
619619
Matcher.mapProbeToBB(&Probe, Blocks[BB->getIndex()]);
620+
620621
// Match inline tree nodes by GUID, checksum, parent, and call site.
621-
const MCDecodedPseudoProbeInlineTree *DummyInlineRoot =
622-
&PseudoProbeDecoder->getDummyInlineRoot();
623622
uint32_t ParentId = 0;
624623
uint32_t PrevGUIDIdx = 0;
625624
uint32_t Index = 0;
@@ -638,23 +637,24 @@ size_t matchWeightsByHashes(
638637
uint32_t InlineTreeNodeId = Index++;
639638
ParentId += InlineTreeNode.ParentIndexDelta;
640639
uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe;
641-
const MCDecodedPseudoProbeInlineTree *ParentNode =
642-
InlineTreeNodeId ? Matcher.getInlineTreeNode(ParentId)
643-
: DummyInlineRoot;
644-
if (!ParentNode)
645-
continue;
646-
for (const MCDecodedPseudoProbeInlineTree &Child :
647-
ParentNode->getChildren()) {
648-
if (Child.Guid != GUID ||
649-
PseudoProbeDecoder->getFuncDescForGUID(GUID)->FuncHash != Hash)
650-
continue;
651-
// Check inline site for non-toplev inline tree nodes.
652-
if (ParentNode != DummyInlineRoot &&
653-
std::get<1>(Child.getInlineSite()) != CallSiteProbe)
654-
continue;
655-
Matcher.mapInlineTreeNode(InlineTreeNodeId, &Child);
656-
break;
640+
const MCDecodedPseudoProbeInlineTree *Cur = nullptr;
641+
if (!InlineTreeNodeId) {
642+
auto It = TopLevelGUIDToInlineTree.find(GUID);
643+
if (It != TopLevelGUIDToInlineTree.end())
644+
Cur = It->second;
645+
} else if (const MCDecodedPseudoProbeInlineTree *Parent =
646+
Matcher.getInlineTreeNode(ParentId)) {
647+
for (const MCDecodedPseudoProbeInlineTree &Child :
648+
Parent->getChildren()) {
649+
if (Child.Guid == GUID) {
650+
if (std::get<1>(Child.getInlineSite()) == CallSiteProbe)
651+
Cur = &Child;
652+
break;
653+
}
654+
}
657655
}
656+
if (Cur && Decoder->getFuncDescForGUID(GUID)->FuncHash == Hash)
657+
Matcher.mapInlineTreeNode(InlineTreeNodeId, Cur);
658658
}
659659
}
660660
Matcher.init(Blocks, BlendedHashes, CallHashes);
@@ -1028,9 +1028,10 @@ bool YAMLProfileReader::inferStaleProfile(
10281028
FlowFunction Func = createFlowFunction(BlockOrder);
10291029

10301030
// Match as many block/jump counts from the stale profile as possible
1031-
size_t MatchedBlocks = matchWeightsByHashes(
1032-
BF.getBinaryContext(), BlockOrder, YamlBF, Func,
1033-
YamlBP.Header.HashFunction, IdToYamLBF, BF, YamlBP.PseudoProbeDesc);
1031+
size_t MatchedBlocks =
1032+
matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func,
1033+
YamlBP.Header.HashFunction, IdToYamLBF, BF,
1034+
YamlBP.PseudoProbeDesc, TopLevelGUIDToInlineTree);
10341035

10351036
// Adjust the flow function by marking unreachable blocks Unlikely so that
10361037
// they don't get any counts assigned.

bolt/lib/Profile/YAMLProfileReader.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/ADT/STLExtras.h"
1717
#include "llvm/ADT/edit_distance.h"
1818
#include "llvm/Demangle/Demangle.h"
19+
#include "llvm/MC/MCPseudoProbe.h"
1920
#include "llvm/Support/CommandLine.h"
2021

2122
using namespace llvm;
@@ -49,6 +50,8 @@ llvm::cl::opt<bool>
4950
llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
5051
cl::desc("use DFS order for YAML profile"),
5152
cl::Hidden, cl::cat(BoltOptCategory));
53+
54+
extern llvm::cl::opt<bool> StaleMatchingWithPseudoProbes;
5255
} // namespace opts
5356

5457
namespace llvm {
@@ -722,6 +725,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
722725
}
723726
}
724727

728+
if (opts::StaleMatchingWithPseudoProbes) {
729+
const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder();
730+
assert(Decoder &&
731+
"If pseudo probes are in use, pseudo probe decoder should exist");
732+
for (const MCDecodedPseudoProbeInlineTree &TopLev :
733+
Decoder->getDummyInlineRoot().getChildren())
734+
TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev;
735+
}
736+
725737
// Map profiled function ids to names.
726738
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
727739
IdToYamLBF[YamlBF.Id] = &YamlBF;

bolt/lib/Rewrite/PseudoProbeRewriter.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes(
5151
cl::Hidden, cl::cat(BoltCategory));
5252

5353
extern cl::opt<bool> ProfileWritePseudoProbes;
54-
extern cl::opt<bool> StaleMatchingWithBlockPseudoProbes;
54+
extern cl::opt<bool> StaleMatchingWithPseudoProbes;
5555
} // namespace opts
5656

5757
namespace {
@@ -94,14 +94,14 @@ class PseudoProbeRewriter final : public MetadataRewriter {
9494

9595
Error PseudoProbeRewriter::preCFGInitializer() {
9696
if (opts::ProfileWritePseudoProbes ||
97-
opts::StaleMatchingWithBlockPseudoProbes)
97+
opts::StaleMatchingWithPseudoProbes)
9898
parsePseudoProbe(opts::ProfileWritePseudoProbes);
9999

100100
return Error::success();
101101
}
102102

103103
Error PseudoProbeRewriter::postEmitFinalizer() {
104-
if (!opts::StaleMatchingWithBlockPseudoProbes)
104+
if (!opts::StaleMatchingWithPseudoProbes)
105105
parsePseudoProbe();
106106
updatePseudoProbes();
107107

bolt/test/X86/match-blocks-with-pseudo-probes.test

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
77
# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
88
# RUN: --print-cfg --funcs=main --infer-stale-profile \
9-
# RUN: --stale-matching-with-block-pseudo-probes 2>&1 | FileCheck %s
9+
# RUN: --stale-matching-with-pseudo-probes 2>&1 | FileCheck %s
1010

11-
# CHECK: BOLT-INFO: inference found an exact pseudo probe match for 100.00% of basic blocks (1 out of 1 stale) responsible for -nan% samples (0 out of 0 stale)
11+
# CHECK: BOLT-INFO: inference found an exact pseudo probe match for 100.00% of basic blocks (1 out of 1 stale)
1212

1313
#--- main.s
1414
.text

bolt/test/X86/pseudoprobe-decoding-inline.test

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@
3636
## generated
3737
# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata
3838
# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT
39-
# CHECK-NO-OPT-NOT: pseudo_probes
40-
# CHECK-NO-OPT-NOT: inline_tree
39+
# CHECK-NO-OPT-NOT: probes:
40+
# CHECK-NO-OPT-NOT: inline_tree:
41+
# CHECK-NO-OPT-NOT: pseudo_probe_desc:
4142

4243
CHECK: Report of decoding input pseudo probe binaries
4344

0 commit comments

Comments
 (0)