Skip to content

Commit f1e659d

Browse files
committed
addressing comments
1 parent 2629a77 commit f1e659d

File tree

3 files changed

+118
-104
lines changed

3 files changed

+118
-104
lines changed

llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -60,15 +60,15 @@ class SampleProfileMatcher {
6060
StringMap<std::unordered_map<LineLocation, MatchState, LineLocationHash>>
6161
FuncCallsiteMatchStates;
6262

63-
struct RenameDecisionCacheHash {
63+
struct FuncProfNameMapHash {
6464
uint64_t
6565
operator()(const std::pair<const Function *, FunctionId> &P) const {
6666
return hash_combine(P.first, P.second);
6767
}
6868
};
6969
std::unordered_map<std::pair<const Function *, FunctionId>, bool,
70-
RenameDecisionCacheHash>
71-
RenameDecisionCache;
70+
FuncProfNameMapHash>
71+
FunctionProfileNameMap;
7272

7373
FunctionMap *SymbolMap;
7474

@@ -118,7 +118,12 @@ class SampleProfileMatcher {
118118
StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
119119
return getFlattenedSamplesFor(FunctionId(CanonFName));
120120
}
121-
void runBlockLevelMatching(Function &F);
121+
void getFilteredAnchorList(const AnchorMap &IRAnchors,
122+
const AnchorMap &ProfileAnchors,
123+
AnchorList &FilteredIRAnchorsList,
124+
AnchorList &FilteredProfileAnchorList);
125+
void runCFGMatching(Function &F);
126+
void runOnFunction(Function &F);
122127
void findIRAnchors(const Function &F, AnchorMap &IRAnchors) const;
123128
void findProfileAnchors(const FunctionSamples &FS,
124129
AnchorMap &ProfileAnchors) const;
@@ -182,20 +187,16 @@ class SampleProfileMatcher {
182187
void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors,
183188
const AnchorMap &ProfileAnchors,
184189
LocToLocMap &IRToProfileLocationMap);
185-
void findIRNewCallees(Function &Caller,
186-
const StringMap<Function *> &IRNewFunctions,
187-
std::vector<Function *> &IRNewCallees);
188-
float checkFunctionSimilarity(const Function &IRFunc,
189-
const FunctionId &ProfFunc);
190-
bool functionIsRenamedImpl(const Function &IRFunc,
191-
const FunctionId &ProfFunc);
192-
bool functionIsRenamed(const Function &IRFunc, const FunctionId &ProfFunc);
193-
void
194-
runFuncRenamingMatchingOnProfile(const StringMap<Function *> &IRNewFunctions,
190+
void findNewIRCallees(Function &Caller,
191+
const StringMap<Function *> &newIRFunctions,
192+
std::vector<Function *> &NewIRCallees);
193+
bool functionMatchesProfile(const Function &IRFunc,
194+
const FunctionId &ProfFunc);
195+
void matchProfileForNewFunctions(const StringMap<Function *> &newIRFunctions,
195196
FunctionSamples &FS,
196197
FunctionMap &OldProfToNewSymbolMap);
197-
void findIRNewFunctions(StringMap<Function *> &IRNewFunctions);
198-
void runFuncLevelMatching();
198+
void findnewIRFunctions(StringMap<Function *> &newIRFunctions);
199+
void runCallGraphMatching();
199200
void reportOrPersistProfileStats();
200201
};
201202
} // end namespace llvm

llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp

Lines changed: 100 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,14 @@ using namespace sampleprof;
2020

2121
#define DEBUG_TYPE "sample-profile-matcher"
2222

23-
static cl::opt<bool> SalvageFunctionRenaming(
24-
"salvage-function-renaming", cl::Hidden, cl::init(false),
25-
cl::desc("Salvage stale profile by function renaming matching."));
23+
static cl::opt<bool> SalvageRenamedProfile(
24+
"salvage-renamed-profile", cl::Hidden, cl::init(false),
25+
cl::desc("Salvage renamed profile by function renaming matching."));
2626

27-
static cl::opt<unsigned> FuncRenamingSimilarityThreshold(
28-
"func-renaming-similarity-threshold", cl::Hidden, cl::init(80),
29-
cl::desc(
30-
"The profile function is considered being renamed if the similarity "
31-
"against IR is above the given number(percentage value)."));
27+
static cl::opt<unsigned> RenamedFuncSimilarityThreshold(
28+
"renamed-func-similarity-threshold", cl::Hidden, cl::init(80),
29+
cl::desc("The profile matches the function if their similarity is above "
30+
"the given number(percentage)."));
3231

3332
extern cl::opt<bool> SalvageStaleProfile;
3433
extern cl::opt<bool> PersistProfileStaleness;
@@ -272,10 +271,9 @@ void SampleProfileMatcher::matchNonCallsiteLocs(
272271

273272
// Filter the non-call locations from IRAnchors and ProfileAnchors and write
274273
// them into a list for random access later.
275-
static void getFilteredAnchorList(const AnchorMap &IRAnchors,
276-
const AnchorMap &ProfileAnchors,
277-
AnchorList &FilteredIRAnchorsList,
278-
AnchorList &FilteredProfileAnchorList) {
274+
void SampleProfileMatcher::getFilteredAnchorList(
275+
const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
276+
AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
279277
for (const auto &I : IRAnchors) {
280278
if (I.second.stringRef().empty())
281279
continue;
@@ -330,7 +328,7 @@ void SampleProfileMatcher::runStaleProfileMatching(
330328
matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
331329
}
332330

333-
void SampleProfileMatcher::runBlockLevelMatching(Function &F) {
331+
void SampleProfileMatcher::runCFGMatching(Function &F) {
334332
// We need to use flattened function samples for matching.
335333
// Unlike IR, which includes all callsites from the source code, the callsites
336334
// in profile only show up when they are hit by samples, i,e. the profile
@@ -612,8 +610,8 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
612610
// Find functions that don't show in the profile or profile symbol list, which
613611
// are supposed to be new functions. We use them as the targets for renaming
614612
// matching.
615-
void SampleProfileMatcher::findIRNewFunctions(
616-
StringMap<Function *> &IRNewFunctions) {
613+
void SampleProfileMatcher::findnewIRFunctions(
614+
StringMap<Function *> &newIRFunctions) {
617615
// TODO: Support MD5 profile.
618616
if (FunctionSamples::UseMD5)
619617
return;
@@ -634,23 +632,26 @@ void SampleProfileMatcher::findIRNewFunctions(
634632
if (FS)
635633
continue;
636634

637-
// For extended binary, the full function name symbols exits in the profile
638-
// symbol list table.
635+
// For extended binary, functions are fully inlined may not be loaded in the
636+
// top-level profile, so check the NameTable which has the all symbol names
637+
// in profile.
639638
if (NamesInProfile.count(CanonFName))
640639
continue;
641640

641+
// For extended binary, non-profiled function symbols are in the profile
642+
// symbol list table.
642643
if (PSL && PSL->contains(CanonFName))
643644
continue;
644645

645646
LLVM_DEBUG(dbgs() << "Function " << CanonFName
646647
<< " is not in profile or symbol list table.\n");
647-
IRNewFunctions[CanonFName] = &F;
648+
newIRFunctions[CanonFName] = &F;
648649
}
649650
}
650651

651-
void SampleProfileMatcher::findIRNewCallees(
652-
Function &Caller, const StringMap<Function *> &IRNewFunctions,
653-
std::vector<Function *> &IRNewCallees) {
652+
void SampleProfileMatcher::findNewIRCallees(
653+
Function &Caller, const StringMap<Function *> &newIRFunctions,
654+
std::vector<Function *> &NewIRCallees) {
654655
for (auto &BB : Caller) {
655656
for (auto &I : BB) {
656657
const auto *CB = dyn_cast<CallBase>(&I);
@@ -661,28 +662,37 @@ void SampleProfileMatcher::findIRNewCallees(
661662
continue;
662663
StringRef CalleeName =
663664
FunctionSamples::getCanonicalFnName(Callee->getName());
664-
if (IRNewFunctions.count(CalleeName))
665-
IRNewCallees.push_back(Callee);
665+
if (newIRFunctions.count(CalleeName))
666+
NewIRCallees.push_back(Callee);
666667
}
667668
}
668669
}
669670

670-
// Use function similarity to determine if the function is renamed. Compute a
671-
// similarity ratio between two sequences which are the function callsite
672-
// anchors. The returned value is in the range [0, 1]. The bigger the value is,
673-
// the more similar two sequences are.
674-
float SampleProfileMatcher::checkFunctionSimilarity(
675-
const Function &IRFunc, const FunctionId &ProfFName) {
671+
// Determine if the function matches profile by computing a similarity ratio
672+
// between two callsite anchors sequences extracted from function and profile.
673+
// The returned value is in the range [0, 1]. The bigger the value is, the more
674+
// similar two sequences are.
675+
bool SampleProfileMatcher::functionMatchesProfile(const Function &IRFunc,
676+
const FunctionId &ProfFunc) {
677+
// Check the cache.
678+
auto R = FunctionProfileNameMap.find({&IRFunc, ProfFunc});
679+
if (R != FunctionProfileNameMap.end())
680+
return R->second;
681+
// The value is in the range [0, 1]. The bigger the value is, the more similar
682+
// two sequences are. -1.0 means the similarity is not set, and 0.0 means no
683+
// match.
684+
float Similarity = -1.0;
685+
676686
AnchorMap IRAnchors;
677687
findIRAnchors(IRFunc, IRAnchors);
678688

679689
AnchorMap ProfileAnchors;
680-
const auto *FSFlattened = getFlattenedSamplesFor(ProfFName);
690+
const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
681691
assert(FSFlattened && "Flattened profile sample is null");
682692
findProfileAnchors(*FSFlattened, ProfileAnchors);
683693

684-
AnchorList FilteredProfileAnchorList;
685694
AnchorList FilteredIRAnchorsList;
695+
AnchorList FilteredProfileAnchorList;
686696
getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
687697
FilteredProfileAnchorList);
688698

@@ -691,48 +701,45 @@ float SampleProfileMatcher::checkFunctionSimilarity(
691701
// the similarity.
692702
if (FunctionSamples::ProfileIsProbeBased) {
693703
const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
694-
// Make sure function is complex enough.
704+
// Probe-based profile checksum is based on the blocks, if the num of
705+
// function block is small, it's more likely to get checksum conflict and
706+
// generate wrong matching.
695707
if (IRAnchors.size() - FilteredIRAnchorsList.size() > 5 && FuncDesc &&
696708
!ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
697-
return 1.0;
709+
Similarity = 1.0;
698710
}
699711
}
700712

701-
if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
702-
return 0.0;
713+
// Skip the matching if the function is tiny. Similarity check may not be
714+
// reiable if the num of anchors is small.
715+
if (Similarity == -1.0 && (FilteredIRAnchorsList.size() <= 2 ||
716+
FilteredProfileAnchorList.size() <= 2))
717+
Similarity = 0.0;
703718

704-
// Use the diff algorithm to find the LCS between IR and profile.
705-
LocToLocMap MatchedAnchors =
706-
longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
719+
if (Similarity == -1.0) {
720+
// Use the diff algorithm to find the LCS between IR and profile.
721+
LocToLocMap MatchedAnchors =
722+
longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
707723

708-
return static_cast<float>(MatchedAnchors.size()) * 2 /
709-
(FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
710-
}
724+
Similarity =
725+
static_cast<float>(MatchedAnchors.size()) * 2 /
726+
(FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
727+
}
711728

712-
bool SampleProfileMatcher::functionIsRenamedImpl(const Function &IRFunc,
713-
const FunctionId &ProfFunc) {
714-
float Similarity = checkFunctionSimilarity(IRFunc, ProfFunc);
715729
LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
716730
<< "(IR) and " << ProfFunc << "(profile) is "
717731
<< format("%.2f", Similarity) << "\n");
718-
return Similarity * 100 > FuncRenamingSimilarityThreshold;
732+
assert((Similarity >= 0 && Similarity <= 1.0) &&
733+
"Similarity value should be in [0, 1]");
734+
bool Matched = Similarity * 100 > RenamedFuncSimilarityThreshold;
735+
FunctionProfileNameMap[{&IRFunc, ProfFunc}] = Matched;
736+
return Matched;
719737
}
720738

721-
bool SampleProfileMatcher::functionIsRenamed(const Function &IRFunc,
722-
const FunctionId &ProfFunc) {
723-
auto R = RenameDecisionCache.find({&IRFunc, ProfFunc});
724-
if (R != RenameDecisionCache.end())
725-
return R->second;
726-
727-
bool V = functionIsRenamedImpl(IRFunc, ProfFunc);
728-
RenameDecisionCache[{&IRFunc, ProfFunc}] = V;
729-
return V;
730-
}
731-
732-
// Run function renaming matching on the profiled CFG edge to limit the matching
733-
// scope.
734-
void SampleProfileMatcher::runFuncRenamingMatchingOnProfile(
735-
const StringMap<Function *> &IRNewFunctions, FunctionSamples &CallerFS,
739+
// Match profile for new function on the profiled call-graph edge to limit the
740+
// matching scope.
741+
void SampleProfileMatcher::matchProfileForNewFunctions(
742+
const StringMap<Function *> &newIRFunctions, FunctionSamples &CallerFS,
736743
FunctionMap &OldProfToNewSymbolMap) {
737744
auto FindIRFunction = [&](const FunctionId &FName) {
738745
// Function can be null if name has conflict, use optional to store the
@@ -741,7 +748,7 @@ void SampleProfileMatcher::runFuncRenamingMatchingOnProfile(
741748

742749
auto R = SymbolMap->find(FName);
743750
if (R != SymbolMap->end())
744-
F = R->second;
751+
return std::optional<Function *>(R->second);
745752

746753
auto NewR = OldProfToNewSymbolMap.find(FName);
747754
if (NewR != OldProfToNewSymbolMap.end())
@@ -751,29 +758,29 @@ void SampleProfileMatcher::runFuncRenamingMatchingOnProfile(
751758
};
752759

753760
// Find the new callees from IR in the current caller scope.
754-
std::vector<Function *> IRNewCallees;
761+
std::vector<Function *> NewIRCallees;
755762
auto Caller = FindIRFunction(CallerFS.getFunction());
756763
if (Caller.has_value() && *Caller) {
757764
// No callees for external function, skip the rename matching.
758765
if ((*Caller)->isDeclaration())
759766
return;
760-
findIRNewCallees(**Caller, IRNewFunctions, IRNewCallees);
767+
findNewIRCallees(**Caller, newIRFunctions, NewIRCallees);
761768
}
762769

763-
// Run renaming matching on CFG edge(caller-callee).
770+
// Run function to profile matching on call-graph edge(caller-callee).
764771
for (auto &CM :
765772
const_cast<CallsiteSampleMap &>(CallerFS.getCallsiteSamples())) {
766773
auto &CalleeMap = CM.second;
767774
// Local container used to update the CallsiteSampleMap.
768775
std::vector<std::pair<FunctionId, FunctionSamples *>> FSamplesToUpdate;
769776
for (auto &CS : CalleeMap) {
770-
auto &CalleeFS = CS.second;
771-
auto ProfCallee = CalleeFS.getFunction();
772-
auto ExistingIRCallee = FindIRFunction(ProfCallee);
773-
// The profile callee is new, run renaming matching.
777+
FunctionSamples &CalleeFS = CS.second;
778+
FunctionId ProfCallee = CalleeFS.getFunction();
779+
std::optional<Function *> ExistingIRCallee = FindIRFunction(ProfCallee);
780+
// The profile callee is new, run function to profile matching.
774781
if (!ExistingIRCallee.has_value()) {
775-
for (auto *IRCallee : IRNewCallees) {
776-
if (functionIsRenamed(*IRCallee, ProfCallee)) {
782+
for (auto *IRCallee : NewIRCallees) {
783+
if (functionMatchesProfile(*IRCallee, ProfCallee)) {
777784
FSamplesToUpdate.emplace_back(ProfCallee, &CalleeFS);
778785
OldProfToNewSymbolMap[ProfCallee] = IRCallee;
779786
// Update the profile in place so that the deeper level matching
@@ -804,8 +811,8 @@ void SampleProfileMatcher::runFuncRenamingMatchingOnProfile(
804811
// Note that even there is no renaming in the current scope, there could
805812
// be renaming in deeper callee scope, we need to traverse all the callee
806813
// profiles.
807-
runFuncRenamingMatchingOnProfile(IRNewFunctions, CalleeFS,
808-
OldProfToNewSymbolMap);
814+
matchProfileForNewFunctions(newIRFunctions, CalleeFS,
815+
OldProfToNewSymbolMap);
809816
}
810817

811818
// Update the CalleeMap using the new name and remove the old entry.
@@ -818,23 +825,25 @@ void SampleProfileMatcher::runFuncRenamingMatchingOnProfile(
818825
}
819826
}
820827

821-
void SampleProfileMatcher::runFuncLevelMatching() {
822-
if (!SalvageFunctionRenaming)
828+
void SampleProfileMatcher::runCallGraphMatching() {
829+
if (!SalvageRenamedProfile)
823830
return;
824-
assert(SymbolMap && "SymbolMap points to null");
831+
assert(SymbolMap && "SymbolMap is null");
832+
assert(FunctionProfileNameMap.empty() &&
833+
"FunctionProfileNameMap is not empty before the call graph matching");
825834

826-
StringMap<Function *> IRNewFunctions;
827-
findIRNewFunctions(IRNewFunctions);
828-
if (IRNewFunctions.empty())
835+
StringMap<Function *> newIRFunctions;
836+
findnewIRFunctions(newIRFunctions);
837+
if (newIRFunctions.empty())
829838
return;
830839

831840
// The new functions found by the renaming matching. Save them into a map
832841
// whose key is the old(profile) function name and value is the new(renamed)
833842
// function.
834843
FunctionMap OldProfToNewSymbolMap;
835844
for (auto &I : Reader.getProfiles())
836-
runFuncRenamingMatchingOnProfile(IRNewFunctions, I.second,
837-
OldProfToNewSymbolMap);
845+
matchProfileForNewFunctions(newIRFunctions, I.second,
846+
OldProfToNewSymbolMap);
838847

839848
// Update all the data generated by the old profile.
840849
if (!OldProfToNewSymbolMap.empty()) {
@@ -850,20 +859,24 @@ void SampleProfileMatcher::runFuncLevelMatching() {
850859
ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
851860
FunctionSamples::ProfileIsCS);
852861
}
853-
RenameDecisionCache.clear();
862+
FunctionProfileNameMap.clear();
863+
}
864+
865+
void SampleProfileMatcher::runOnFunction(Function &F) {
866+
if (skipProfileForFunction(F))
867+
return;
868+
runCFGMatching(F);
854869
}
855870

856871
void SampleProfileMatcher::runOnModule(FunctionMap &SymMap) {
857872
ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
858873
FunctionSamples::ProfileIsCS);
859874
SymbolMap = &SymMap;
860-
runFuncLevelMatching();
875+
runCallGraphMatching();
876+
877+
for (auto &F : M)
878+
runOnFunction(F);
861879

862-
for (auto &F : M) {
863-
if (skipProfileForFunction(F))
864-
continue;
865-
runBlockLevelMatching(F);
866-
}
867880
if (SalvageStaleProfile)
868881
distributeIRToProfileLocationMap();
869882

0 commit comments

Comments
 (0)