@@ -117,8 +117,8 @@ cl::opt<unsigned> StaleMatchingCostJumpUnknownFTInc(
117
117
" The cost of increasing an unknown fall-through jump count by one." ),
118
118
cl::init(3 ), cl::ReallyHidden, cl::cat(BoltOptCategory));
119
119
120
- cl::opt<bool > StaleMatchingWithBlockPseudoProbes (
121
- " stale-matching-with-block- pseudo-probes" ,
120
+ cl::opt<bool > StaleMatchingWithPseudoProbes (
121
+ " stale-matching-with-pseudo-probes" ,
122
122
cl::desc (" Turns on stale matching with block pseudo probes." ),
123
123
cl::init(false ), cl::ReallyHidden, cl::cat(BoltOptCategory));
124
124
@@ -328,7 +328,7 @@ class StaleMatcher {
328
328
std::pair<const FlowBlock *, bool > matchWithPseudoProbes (
329
329
const ArrayRef<yaml::bolt::PseudoProbeInfo> BlockPseudoProbes,
330
330
const ArrayRef<yaml::bolt::InlineTreeInfo> InlineTree) const {
331
- if (!opts::StaleMatchingWithBlockPseudoProbes )
331
+ if (!opts::StaleMatchingWithPseudoProbes )
332
332
return {nullptr , false };
333
333
334
334
DenseMap<const FlowBlock *, uint32_t > FlowBlockMatchCount;
@@ -574,7 +574,8 @@ size_t matchWeightsByHashes(
574
574
BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
575
575
const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func,
576
576
HashFunction HashFunction, YAMLProfileReader::ProfileLookupMap &IdToYamlBF,
577
- const BinaryFunction &BF, const yaml::bolt::PseudoProbeDesc &YamlPD) {
577
+ const BinaryFunction &BF, const yaml::bolt::PseudoProbeDesc &YamlPD,
578
+ const YAMLProfileReader::GUIDInlineTreeMap &TopLevelGUIDToInlineTree) {
578
579
579
580
assert (Func.Blocks .size () == BlockOrder.size () + 2 );
580
581
@@ -605,21 +606,19 @@ size_t matchWeightsByHashes(
605
606
}
606
607
StaleMatcher Matcher;
607
608
// Collects function pseudo probes for use in the StaleMatcher.
608
- if (opts::StaleMatchingWithBlockPseudoProbes ) {
609
- const MCPseudoProbeDecoder *PseudoProbeDecoder = BC.getPseudoProbeDecoder ();
610
- assert (PseudoProbeDecoder &&
609
+ if (opts::StaleMatchingWithPseudoProbes ) {
610
+ const MCPseudoProbeDecoder *Decoder = BC.getPseudoProbeDecoder ();
611
+ assert (Decoder &&
611
612
" If pseudo probes are in use, pseudo probe decoder should exist" );
612
- const AddressProbesMap &ProbeMap =
613
- PseudoProbeDecoder->getAddress2ProbesMap ();
613
+ const AddressProbesMap &ProbeMap = Decoder->getAddress2ProbesMap ();
614
614
const uint64_t FuncAddr = BF.getAddress ();
615
615
for (const MCDecodedPseudoProbe &Probe :
616
616
ProbeMap.find (FuncAddr, FuncAddr + BF.getSize ()))
617
617
if (const BinaryBasicBlock *BB =
618
618
BF.getBasicBlockContainingOffset (Probe.getAddress () - FuncAddr))
619
619
Matcher.mapProbeToBB (&Probe, Blocks[BB->getIndex ()]);
620
+
620
621
// Match inline tree nodes by GUID, checksum, parent, and call site.
621
- const MCDecodedPseudoProbeInlineTree *DummyInlineRoot =
622
- &PseudoProbeDecoder->getDummyInlineRoot ();
623
622
uint32_t ParentId = 0 ;
624
623
uint32_t PrevGUIDIdx = 0 ;
625
624
uint32_t Index = 0 ;
@@ -638,23 +637,24 @@ size_t matchWeightsByHashes(
638
637
uint32_t InlineTreeNodeId = Index++;
639
638
ParentId += InlineTreeNode.ParentIndexDelta ;
640
639
uint32_t CallSiteProbe = InlineTreeNode.CallSiteProbe ;
641
- const MCDecodedPseudoProbeInlineTree *ParentNode =
642
- InlineTreeNodeId ? Matcher.getInlineTreeNode (ParentId)
643
- : DummyInlineRoot;
644
- if (!ParentNode)
645
- continue ;
646
- for (const MCDecodedPseudoProbeInlineTree &Child :
647
- ParentNode->getChildren ()) {
648
- if (Child.Guid != GUID ||
649
- PseudoProbeDecoder->getFuncDescForGUID (GUID)->FuncHash != Hash)
650
- continue ;
651
- // Check inline site for non-toplev inline tree nodes.
652
- if (ParentNode != DummyInlineRoot &&
653
- std::get<1 >(Child.getInlineSite ()) != CallSiteProbe)
654
- continue ;
655
- Matcher.mapInlineTreeNode (InlineTreeNodeId, &Child);
656
- break ;
640
+ const MCDecodedPseudoProbeInlineTree *Cur = nullptr ;
641
+ if (!InlineTreeNodeId) {
642
+ auto It = TopLevelGUIDToInlineTree.find (GUID);
643
+ if (It != TopLevelGUIDToInlineTree.end ())
644
+ Cur = It->second ;
645
+ } else if (const MCDecodedPseudoProbeInlineTree *Parent =
646
+ Matcher.getInlineTreeNode (ParentId)) {
647
+ for (const MCDecodedPseudoProbeInlineTree &Child :
648
+ Parent->getChildren ()) {
649
+ if (Child.Guid == GUID) {
650
+ if (std::get<1 >(Child.getInlineSite ()) == CallSiteProbe)
651
+ Cur = &Child;
652
+ break ;
653
+ }
654
+ }
657
655
}
656
+ if (Cur && Decoder->getFuncDescForGUID (GUID)->FuncHash == Hash)
657
+ Matcher.mapInlineTreeNode (InlineTreeNodeId, Cur);
658
658
}
659
659
}
660
660
Matcher.init (Blocks, BlendedHashes, CallHashes);
@@ -1028,9 +1028,10 @@ bool YAMLProfileReader::inferStaleProfile(
1028
1028
FlowFunction Func = createFlowFunction (BlockOrder);
1029
1029
1030
1030
// Match as many block/jump counts from the stale profile as possible
1031
- size_t MatchedBlocks = matchWeightsByHashes (
1032
- BF.getBinaryContext (), BlockOrder, YamlBF, Func,
1033
- YamlBP.Header .HashFunction , IdToYamLBF, BF, YamlBP.PseudoProbeDesc );
1031
+ size_t MatchedBlocks =
1032
+ matchWeightsByHashes (BF.getBinaryContext (), BlockOrder, YamlBF, Func,
1033
+ YamlBP.Header .HashFunction , IdToYamLBF, BF,
1034
+ YamlBP.PseudoProbeDesc , TopLevelGUIDToInlineTree);
1034
1035
1035
1036
// Adjust the flow function by marking unreachable blocks Unlikely so that
1036
1037
// they don't get any counts assigned.
0 commit comments