Skip to content

Commit 7d1094e

Browse files
shawbyoungyuxuanchen1997
authored andcommitted
[BOLT] Match functions with call graph (#98125)
Implemented call graph function matching. First, two call graphs are constructed for both profiled and binary functions. Then functions are hashed based on the names of their callee/caller functions. Finally, functions are matched based on these neighbor hashes and the longest common prefix of their names. The `match-with-call-graph` flag turns this matching on. Test Plan: Added match-with-call-graph.test. Matched 164 functions in a large binary with 10171 profiled functions.
1 parent 4869433 commit 7d1094e

File tree

4 files changed

+313
-7
lines changed

4 files changed

+313
-7
lines changed

bolt/docs/CommandLineArgumentReference.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,10 @@
686686
threshold means fewer functions to process. E.g threshold of 90 means only top
687687
10 percent of functions with profile will be processed.
688688

689+
- `--match-with-call-graph`
690+
691+
Match functions with call graph
692+
689693
- `--memcpy1-spec=<func1,func2:cs1:cs2,func3:cs1,...>`
690694

691695
List of functions with call sites for which to specialize memcpy() for size 1

bolt/include/bolt/Profile/YAMLProfileReader.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,59 @@ class YAMLProfileReader : public ProfileReaderBase {
4343
using ProfileLookupMap =
4444
DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>;
4545

46+
/// A class for matching binary functions in functions in the YAML profile.
47+
/// First, a call graph is constructed for both profiled and binary functions.
48+
/// Then functions are hashed based on the names of their callee/caller
49+
/// functions. Finally, functions are matched based on these neighbor hashes.
50+
class CallGraphMatcher {
51+
public:
52+
/// Constructs the call graphs for binary and profiled functions and
53+
/// computes neighbor hashes for binary functions.
54+
CallGraphMatcher(BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP,
55+
ProfileLookupMap &IdToYAMLBF);
56+
57+
/// Returns the YamlBFs adjacent to the parameter YamlBF in the call graph.
58+
std::optional<std::set<yaml::bolt::BinaryFunctionProfile *>>
59+
getAdjacentYamlBFs(yaml::bolt::BinaryFunctionProfile &YamlBF) {
60+
auto It = YamlBFAdjacencyMap.find(&YamlBF);
61+
return It == YamlBFAdjacencyMap.end() ? std::nullopt
62+
: std::make_optional(It->second);
63+
}
64+
65+
/// Returns the binary functions with the parameter neighbor hash.
66+
std::optional<std::vector<BinaryFunction *>>
67+
getBFsWithNeighborHash(uint64_t NeighborHash) {
68+
auto It = NeighborHashToBFs.find(NeighborHash);
69+
return It == NeighborHashToBFs.end() ? std::nullopt
70+
: std::make_optional(It->second);
71+
}
72+
73+
private:
74+
/// Adds edges to the binary function call graph given the callsites of the
75+
/// parameter function.
76+
void constructBFCG(BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP);
77+
78+
/// Using the constructed binary function call graph, computes and creates
79+
/// mappings from "neighbor hash" (composed of the function names of callee
80+
/// and caller functions of a function) to binary functions.
81+
void computeBFNeighborHashes(BinaryContext &BC);
82+
83+
/// Constructs the call graph for profile functions.
84+
void constructYAMLFCG(yaml::bolt::BinaryProfile &YamlBP,
85+
ProfileLookupMap &IdToYAMLBF);
86+
87+
/// Adjacency map for binary functions in the call graph.
88+
DenseMap<BinaryFunction *, std::set<BinaryFunction *>> BFAdjacencyMap;
89+
90+
/// Maps neighbor hashes to binary functions.
91+
DenseMap<uint64_t, std::vector<BinaryFunction *>> NeighborHashToBFs;
92+
93+
/// Adjacency map for profile functions in the call graph.
94+
DenseMap<yaml::bolt::BinaryFunctionProfile *,
95+
std::set<yaml::bolt::BinaryFunctionProfile *>>
96+
YamlBFAdjacencyMap;
97+
};
98+
4699
private:
47100
/// Adjustments for basic samples profiles (without LBR).
48101
bool NormalizeByInsnCount{false};
@@ -100,6 +153,9 @@ class YAMLProfileReader : public ProfileReaderBase {
100153
/// Matches functions using exact hash.
101154
size_t matchWithHash(BinaryContext &BC);
102155

156+
/// Matches functions using the call graph.
157+
size_t matchWithCallGraph(BinaryContext &BC);
158+
103159
/// Matches functions with similarly named profiled functions.
104160
size_t matchWithNameSimilarity(BinaryContext &BC);
105161

bolt/lib/Profile/YAMLProfileReader.cpp

Lines changed: 150 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ llvm::cl::opt<bool>
4141
MatchProfileWithFunctionHash("match-profile-with-function-hash",
4242
cl::desc("Match profile with function hash"),
4343
cl::Hidden, cl::cat(BoltOptCategory));
44+
llvm::cl::opt<bool>
45+
MatchWithCallGraph("match-with-call-graph",
46+
cl::desc("Match functions with call graph"), cl::Hidden,
47+
cl::cat(BoltOptCategory));
4448

4549
llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
4650
cl::desc("use DFS order for YAML profile"),
@@ -50,6 +54,69 @@ llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
5054
namespace llvm {
5155
namespace bolt {
5256

57+
YAMLProfileReader::CallGraphMatcher::CallGraphMatcher(
58+
BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP,
59+
ProfileLookupMap &IdToYAMLBF) {
60+
constructBFCG(BC, YamlBP);
61+
constructYAMLFCG(YamlBP, IdToYAMLBF);
62+
computeBFNeighborHashes(BC);
63+
}
64+
65+
void YAMLProfileReader::CallGraphMatcher::constructBFCG(
66+
BinaryContext &BC, yaml::bolt::BinaryProfile &YamlBP) {
67+
for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
68+
for (const BinaryBasicBlock &BB : BF->blocks()) {
69+
for (const MCInst &Instr : BB) {
70+
if (!BC.MIB->isCall(Instr))
71+
continue;
72+
const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
73+
if (!CallSymbol)
74+
continue;
75+
BinaryData *BD = BC.getBinaryDataByName(CallSymbol->getName());
76+
if (!BD)
77+
continue;
78+
BinaryFunction *CalleeBF = BC.getFunctionForSymbol(BD->getSymbol());
79+
if (!CalleeBF)
80+
continue;
81+
82+
BFAdjacencyMap[CalleeBF].insert(BF);
83+
BFAdjacencyMap[BF].insert(CalleeBF);
84+
}
85+
}
86+
}
87+
}
88+
89+
void YAMLProfileReader::CallGraphMatcher::computeBFNeighborHashes(
90+
BinaryContext &BC) {
91+
for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
92+
auto It = BFAdjacencyMap.find(BF);
93+
if (It == BFAdjacencyMap.end())
94+
continue;
95+
auto &AdjacentBFs = It->second;
96+
std::string HashStr;
97+
for (BinaryFunction *BF : AdjacentBFs)
98+
HashStr += BF->getOneName();
99+
uint64_t Hash = std::hash<std::string>{}(HashStr);
100+
NeighborHashToBFs[Hash].push_back(BF);
101+
}
102+
}
103+
104+
void YAMLProfileReader::CallGraphMatcher::constructYAMLFCG(
105+
yaml::bolt::BinaryProfile &YamlBP, ProfileLookupMap &IdToYAMLBF) {
106+
107+
for (auto &CallerYamlBF : YamlBP.Functions) {
108+
for (auto &YamlBB : CallerYamlBF.Blocks) {
109+
for (auto &CallSite : YamlBB.CallSites) {
110+
auto IdToYAMLBFIt = IdToYAMLBF.find(CallSite.DestId);
111+
if (IdToYAMLBFIt == IdToYAMLBF.end())
112+
continue;
113+
YamlBFAdjacencyMap[&CallerYamlBF].insert(IdToYAMLBFIt->second);
114+
YamlBFAdjacencyMap[IdToYAMLBFIt->second].insert(&CallerYamlBF);
115+
}
116+
}
117+
}
118+
}
119+
53120
bool YAMLProfileReader::isYAML(const StringRef Filename) {
54121
if (auto MB = MemoryBuffer::getFileOrSTDIN(Filename)) {
55122
StringRef Buffer = (*MB)->getBuffer();
@@ -350,7 +417,7 @@ bool YAMLProfileReader::profileMatches(
350417
}
351418

352419
bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
353-
if (opts::MatchProfileWithFunctionHash)
420+
if (opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)
354421
return true;
355422
for (StringRef Name : BF.getNames())
356423
if (ProfileFunctionNames.contains(Name))
@@ -446,6 +513,79 @@ size_t YAMLProfileReader::matchWithLTOCommonName() {
446513
return MatchedWithLTOCommonName;
447514
}
448515

516+
size_t YAMLProfileReader::matchWithCallGraph(BinaryContext &BC) {
517+
if (!opts::MatchWithCallGraph)
518+
return 0;
519+
520+
size_t MatchedWithCallGraph = 0;
521+
CallGraphMatcher CGMatcher(BC, YamlBP, IdToYamLBF);
522+
523+
ItaniumPartialDemangler Demangler;
524+
auto GetBaseName = [&](std::string &FunctionName) {
525+
if (Demangler.partialDemangle(FunctionName.c_str()))
526+
return std::string("");
527+
size_t BufferSize = 1;
528+
char *Buffer = static_cast<char *>(std::malloc(BufferSize));
529+
char *BaseName = Demangler.getFunctionBaseName(Buffer, &BufferSize);
530+
if (!BaseName) {
531+
std::free(Buffer);
532+
return std::string("");
533+
}
534+
if (Buffer != BaseName)
535+
Buffer = BaseName;
536+
std::string BaseNameStr(Buffer, BufferSize);
537+
std::free(Buffer);
538+
return BaseNameStr;
539+
};
540+
541+
// Matches YAMLBF to BFs with neighbor hashes.
542+
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
543+
if (YamlBF.Used)
544+
continue;
545+
auto AdjacentYamlBFsOpt = CGMatcher.getAdjacentYamlBFs(YamlBF);
546+
if (!AdjacentYamlBFsOpt)
547+
continue;
548+
std::set<yaml::bolt::BinaryFunctionProfile *> AdjacentYamlBFs =
549+
AdjacentYamlBFsOpt.value();
550+
std::string AdjacentYamlBFsHashStr;
551+
for (auto *AdjacentYamlBF : AdjacentYamlBFs)
552+
AdjacentYamlBFsHashStr += AdjacentYamlBF->Name;
553+
uint64_t Hash = std::hash<std::string>{}(AdjacentYamlBFsHashStr);
554+
auto BFsWithSameHashOpt = CGMatcher.getBFsWithNeighborHash(Hash);
555+
if (!BFsWithSameHashOpt)
556+
continue;
557+
std::vector<BinaryFunction *> BFsWithSameHash = BFsWithSameHashOpt.value();
558+
// Finds the binary function with the longest common prefix to the profiled
559+
// function and matches.
560+
BinaryFunction *ClosestBF = nullptr;
561+
size_t LCP = 0;
562+
std::string YamlBFBaseName = GetBaseName(YamlBF.Name);
563+
for (BinaryFunction *BF : BFsWithSameHash) {
564+
if (ProfiledFunctions.count(BF))
565+
continue;
566+
std::string BFName = std::string(BF->getOneName());
567+
std::string BFBaseName = GetBaseName(BFName);
568+
size_t PrefixLength = 0;
569+
size_t N = std::min(YamlBFBaseName.size(), BFBaseName.size());
570+
for (size_t I = 0; I < N; ++I) {
571+
if (YamlBFBaseName[I] != BFBaseName[I])
572+
break;
573+
++PrefixLength;
574+
}
575+
if (PrefixLength >= LCP) {
576+
LCP = PrefixLength;
577+
ClosestBF = BF;
578+
}
579+
}
580+
if (ClosestBF) {
581+
matchProfileToFunction(YamlBF, *ClosestBF);
582+
++MatchedWithCallGraph;
583+
}
584+
}
585+
586+
return MatchedWithCallGraph;
587+
}
588+
449589
size_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
450590
if (opts::NameSimilarityFunctionMatchingThreshold == 0)
451591
return 0;
@@ -581,9 +721,14 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
581721
}
582722
}
583723

724+
// Map profiled function ids to names.
725+
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
726+
IdToYamLBF[YamlBF.Id] = &YamlBF;
727+
584728
const size_t MatchedWithExactName = matchWithExactName();
585729
const size_t MatchedWithHash = matchWithHash(BC);
586730
const size_t MatchedWithLTOCommonName = matchWithLTOCommonName();
731+
const size_t MatchedWithCallGraph = matchWithCallGraph(BC);
587732
const size_t MatchedWithNameSimilarity = matchWithNameSimilarity(BC);
588733

589734
for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
@@ -603,18 +748,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
603748
<< " functions with hash\n";
604749
outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
605750
<< " functions with matching LTO common names\n";
751+
outs() << "BOLT-INFO: matched " << MatchedWithCallGraph
752+
<< " functions with call graph\n";
606753
outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
607754
<< " functions with similar names\n";
608755
}
609756

610757
// Set for parseFunctionProfile().
611758
NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
612759
NormalizeByCalls = usesEvent("branches");
613-
614-
// Map profiled function ids to names.
615-
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
616-
IdToYamLBF[YamlBF.Id] = &YamlBF;
617-
618760
uint64_t NumUnused = 0;
619761
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
620762
if (YamlBF.Id >= YamlProfileToFunction.size()) {
@@ -630,7 +772,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
630772

631773
BC.setNumUnusedProfiledObjects(NumUnused);
632774

633-
if (opts::Lite && opts::MatchProfileWithFunctionHash) {
775+
if (opts::Lite &&
776+
(opts::MatchProfileWithFunctionHash || opts::MatchWithCallGraph)) {
634777
for (BinaryFunction *BF : BC.getAllBinaryFunctions())
635778
if (!BF->hasProfile())
636779
BF->setIgnored();

0 commit comments

Comments
 (0)