Skip to content

Commit 854eaf4

Browse files
committed
Addressing feedback
1 parent 6c9f4fc commit 854eaf4

File tree

4 files changed

+149
-140
lines changed

4 files changed

+149
-140
lines changed

llvm/lib/Transforms/IPO/SampleProfile.cpp

Lines changed: 101 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -447,8 +447,12 @@ class SampleProfileMatcher {
447447
// Match state for an anchor/callsite.
448448
enum class MatchState {
449449
Matched = 0,
450-
Mismatched = 0x1,
451-
Recovered = 0x1,
450+
Mismatched = 1,
451+
// Stay Matched after profile matching.
452+
StayMatched = 2,
453+
// Recovered from Mismatched after profile matching.
454+
Recovered = 3,
455+
Unknown = 32,
452456
};
453457

454458
// For each function, store every callsite state into a map, of which each
@@ -457,19 +461,17 @@ class SampleProfileMatcher {
457461
StringMap<std::unordered_map<LineLocation, MatchState, LineLocationHash>>
458462
FuncCallsiteMatchStates;
459463

460-
/// Profile mismatch statstics:
464+
// Profile mismatch statstics:
461465
uint64_t TotalProfiledFunc = 0;
462-
// Num of function whose checksum is mismatched.
463-
uint64_t NumMismatchedFunc = 0;
466+
// Num of checksum-mismatched function.
467+
uint64_t NumStaleProfileFunc = 0;
464468
uint64_t TotalProfiledCallsites = 0;
465469
uint64_t NumMismatchedCallsites = 0;
466470
uint64_t NumRecoveredCallsites = 0;
467-
468-
/// Weigted profile samples mismatch statstics:
471+
// Total samples for all profiled functions.
469472
uint64_t TotalFunctionSamples = 0;
470-
// Samples for the mismatched checksum functions;
473+
// Total samples for all checksum-mismatched functions.
471474
uint64_t MismatchedFunctionSamples = 0;
472-
473475
uint64_t MismatchedCallsiteSamples = 0;
474476
uint64_t RecoveredCallsiteSamples = 0;
475477

@@ -504,11 +506,11 @@ class SampleProfileMatcher {
504506
const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
505507
const std::map<LineLocation, std::unordered_set<FunctionId>>
506508
&ProfileAnchors,
507-
const LocToLocMap &IRToProfileLocationMap);
509+
const LocToLocMap *IRToProfileLocationMap);
508510

509511
// Count the samples of checksum mismatched function for the top-level
510512
// function and all inlinees.
511-
void countMismatchedFuncSamples(const FunctionSamples &FS);
513+
void countMismatchedFuncSamples(const FunctionSamples &FS, bool IsTopLevel);
512514
// Count the number of mismatched or recovered callsites.
513515
void countMismatchCallsites(const FunctionSamples &FS);
514516
// Count the samples of mismatched or recovered callsites for top-level
@@ -715,7 +717,7 @@ void SampleProfileLoaderBaseImpl<Function>::computeDominanceAndLoopInfo(
715717
}
716718
} // namespace llvm
717719

718-
bool ShouldSkipProfileLoading(const Function &F) {
720+
static bool skipProfileForFunction(const Function &F) {
719721
return F.isDeclaration() || !F.hasFnAttribute("use-sample-profile");
720722
}
721723

@@ -1903,7 +1905,7 @@ SampleProfileLoader::buildProfiledCallGraph(Module &M) {
19031905
// the profile. This makes sure functions missing from the profile still
19041906
// gets a chance to be processed.
19051907
for (Function &F : M) {
1906-
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
1908+
if (skipProfileForFunction(F))
19071909
continue;
19081910
ProfiledCG->addProfiledFunction(
19091911
getRepInFormat(FunctionSamples::getCanonicalFnName(F)));
@@ -1932,7 +1934,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
19321934
}
19331935

19341936
for (Function &F : M)
1935-
if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
1937+
if (!skipProfileForFunction(F))
19361938
FunctionOrderList.push_back(&F);
19371939
return FunctionOrderList;
19381940
}
@@ -1998,7 +2000,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
19982000
}
19992001
for (auto *Node : Range) {
20002002
Function *F = SymbolMap.lookup(Node->Name);
2001-
if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
2003+
if (F && !skipProfileForFunction(*F))
20022004
FunctionOrderList.push_back(F);
20032005
}
20042006
++CGI;
@@ -2009,7 +2011,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
20092011
for (LazyCallGraph::SCC &C : RC) {
20102012
for (LazyCallGraph::Node &N : C) {
20112013
Function &F = N.getFunction();
2012-
if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
2014+
if (!skipProfileForFunction(F))
20132015
FunctionOrderList.push_back(&F);
20142016
}
20152017
}
@@ -2374,7 +2376,7 @@ void SampleProfileMatcher::runOnFunction(const Function &F) {
23742376

23752377
// Compute the callsite match states for profile staleness report.
23762378
if (ReportProfileStaleness || PersistProfileStaleness)
2377-
computeCallsiteMatchStates(F, IRAnchors, ProfileAnchors, LocToLocMap());
2379+
computeCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
23782380

23792381
// Run profile matching for checksum mismatched profile, currently only
23802382
// support for pseudo-probe.
@@ -2386,32 +2388,32 @@ void SampleProfileMatcher::runOnFunction(const Function &F) {
23862388
runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
23872389
IRToProfileLocationMap);
23882390
// Find and update callsite match states after matching.
2389-
if ((ReportProfileStaleness || PersistProfileStaleness) &&
2390-
!IRToProfileLocationMap.empty())
2391+
if (ReportProfileStaleness || PersistProfileStaleness)
23912392
computeCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
2392-
IRToProfileLocationMap);
2393+
&IRToProfileLocationMap);
23932394
}
23942395
}
23952396

23962397
void SampleProfileMatcher::computeCallsiteMatchStates(
23972398
const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
23982399
const std::map<LineLocation, std::unordered_set<FunctionId>>
23992400
&ProfileAnchors,
2400-
const LocToLocMap &IRToProfileLocationMap) {
2401-
// Use the matching result to determine if it's in post-match phrase.
2402-
bool IsPostMatch = !IRToProfileLocationMap.empty();
2403-
auto &MismatchedCallsites =
2401+
const LocToLocMap *IRToProfileLocationMap) {
2402+
bool IsPostMatch = IRToProfileLocationMap != nullptr;
2403+
auto &CallsiteMatchStates =
24042404
FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
24052405

2406+
// IRToProfileLocationMap is null before the matching.
24062407
auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
2407-
const auto &ProfileLoc = IRToProfileLocationMap.find(IRLoc);
2408-
if (ProfileLoc != IRToProfileLocationMap.end())
2408+
if (!IRToProfileLocationMap)
2409+
return IRLoc;
2410+
const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
2411+
if (ProfileLoc != IRToProfileLocationMap->end())
24092412
return ProfileLoc->second;
24102413
else
24112414
return IRLoc;
24122415
};
24132416

2414-
std::set<LineLocation> MatchedCallsites;
24152417
for (const auto &I : IRAnchors) {
24162418
// In post-match, use the matching result to remap the current IR callsite.
24172419
const auto &Loc = MapIRLocToProfileLoc(I.first);
@@ -2421,18 +2423,27 @@ void SampleProfileMatcher::computeCallsiteMatchStates(
24212423
continue;
24222424
const auto &Callees = It->second;
24232425

2426+
bool IsCallsiteMatched = false;
24242427
// Since indirect call does not have CalleeName, check conservatively if
24252428
// callsite in the profile is a callsite location. This is to reduce num of
24262429
// false positive since otherwise all the indirect call samples will be
24272430
// reported as mismatching.
24282431
if (IRCalleeName == SampleProfileMatcher::UnknownIndirectCallee)
2429-
MatchedCallsites.insert(Loc);
2430-
// TODO : Ideally, we should ensure it's a direct callsite location(Callees
2431-
// size is 1). However, there may be a bug for profile merge(like ODR
2432-
// violation) that causes the callees size to be more than 1. After we fix
2433-
// the bug, we can remove this check.
2434-
else if (Callees.count(getRepInFormat(IRCalleeName)))
2435-
MatchedCallsites.insert(Loc);
2432+
IsCallsiteMatched = true;
2433+
else if (Callees.size() == 1 && Callees.count(getRepInFormat(IRCalleeName)))
2434+
IsCallsiteMatched = true;
2435+
2436+
if (IsCallsiteMatched) {
2437+
auto R = CallsiteMatchStates.emplace(Loc, MatchState::Matched);
2438+
// Update the post-match state when there is a existing state indicateing
2439+
// it's in post-match phrase.
2440+
if (!R.second) {
2441+
if (R.first->second == MatchState::Mismatched)
2442+
R.first->second = MatchState::Recovered;
2443+
if (R.first->second == MatchState::Matched)
2444+
R.first->second = MatchState::StayMatched;
2445+
}
2446+
}
24362447
}
24372448

24382449
// Check if there are any callsites in the profile that does not match to any
@@ -2441,37 +2452,40 @@ void SampleProfileMatcher::computeCallsiteMatchStates(
24412452
const auto &Loc = I.first;
24422453
[[maybe_unused]] const auto &Callees = I.second;
24432454
assert(!Callees.empty() && "Callees should not be empty");
2444-
if (IsPostMatch) {
2445-
if (MatchedCallsites.count(Loc)) {
2446-
auto It = MismatchedCallsites.find(Loc);
2447-
if (It != MismatchedCallsites.end() &&
2448-
It->second == MatchState::Mismatched)
2449-
MismatchedCallsites.emplace(Loc, MatchState::Recovered);
2450-
} else
2451-
MismatchedCallsites.emplace(Loc, MatchState::Mismatched);
2452-
} else {
2453-
if (MatchedCallsites.count(Loc))
2454-
MismatchedCallsites.emplace(Loc, MatchState::Matched);
2455-
else
2456-
MismatchedCallsites.emplace(Loc, MatchState::Mismatched);
2457-
}
2455+
auto It = CallsiteMatchStates.find(Loc);
2456+
if (It == CallsiteMatchStates.end())
2457+
CallsiteMatchStates.emplace(Loc, MatchState::Mismatched);
2458+
// If in post-match, the state is not updated to Recovered or StayMatched,
2459+
// update it to Mismatched.
2460+
else if (IsPostMatch && It->second == MatchState::Matched)
2461+
CallsiteMatchStates.emplace(Loc, MatchState::Mismatched);
24582462
}
24592463
}
24602464

2461-
void SampleProfileMatcher::countMismatchedFuncSamples(
2462-
const FunctionSamples &FS) {
2465+
void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
2466+
bool IsTopLevel) {
24632467
const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
24642468
// Skip the function that is external or renamed.
24652469
if (!FuncDesc)
24662470
return;
24672471

24682472
if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
2473+
if (IsTopLevel)
2474+
NumStaleProfileFunc++;
2475+
// Once the checksum is mismatched, it's likely all the callites are
2476+
// mismatched and dropped, we conservatively count all the samples as
2477+
// mismatched samples and stop counting the inlinee profile.
24692478
MismatchedFunctionSamples += FS.getTotalSamples();
24702479
return;
24712480
}
2481+
2482+
// Even the current function checksum is matched, it's possible that the
2483+
// inlinees' checksums are mismatched, we need to go deeper to check the
2484+
// inlinee's function samples. Similarly, if the inlinee's checksum is
2485+
// mismatched, we stop and count all the samples as mismatched samples.
24722486
for (const auto &I : FS.getCallsiteSamples())
24732487
for (const auto &CS : I.second)
2474-
countMismatchedFuncSamples(CS.second);
2488+
countMismatchedFuncSamples(CS.second, false);
24752489
}
24762490

24772491
void SampleProfileMatcher::countMismatchedCallsiteSamples(
@@ -2480,39 +2494,41 @@ void SampleProfileMatcher::countMismatchedCallsiteSamples(
24802494
// Skip it if no mismatched callsite or this is an external function.
24812495
if (It == FuncCallsiteMatchStates.end() || It->second.empty())
24822496
return;
2483-
const auto &MismatchCallsites = It->second;
2497+
const auto &CallsiteMatchStates = It->second;
24842498

2485-
auto IsCallsiteMismatched = [&](const LineLocation &Loc) {
2486-
auto It = MismatchCallsites.find(Loc);
2487-
if (It == MismatchCallsites.end())
2488-
return false;
2489-
return It->second == MatchState::Mismatched;
2499+
auto findMatchState = [&](const LineLocation &Loc) {
2500+
auto It = CallsiteMatchStates.find(Loc);
2501+
if (It == CallsiteMatchStates.end())
2502+
return MatchState::Unknown;
2503+
return It->second;
24902504
};
24912505

2492-
auto CountSamples = [&](const LineLocation &Loc, uint64_t Samples) {
2493-
auto It = MismatchCallsites.find(Loc);
2494-
if (It == MismatchCallsites.end())
2495-
return;
2496-
if (It->second == MatchState::Mismatched)
2506+
auto AttributeMismatchedSamples = [&](const enum MatchState &State,
2507+
uint64_t Samples) {
2508+
if (State == MatchState::Mismatched)
24972509
MismatchedCallsiteSamples += Samples;
2498-
else if (It->second == MatchState::Recovered)
2510+
else if (State == MatchState::Recovered)
24992511
RecoveredCallsiteSamples += Samples;
25002512
};
25012513

2514+
// The non-inlined callsites are saved in the body samples of function
2515+
// profile.
25022516
for (const auto &I : FS.getBodySamples())
2503-
CountSamples(I.first, I.second.getSamples());
2517+
AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
25042518

25052519
for (const auto &I : FS.getCallsiteSamples()) {
2506-
uint64_t Samples = 0;
2520+
auto State = findMatchState(I.first);
2521+
uint64_t CallsiteSamples = 0;
25072522
for (const auto &CS : I.second)
2508-
Samples += CS.second.getTotalSamples();
2509-
2510-
CountSamples(I.first, Samples);
2523+
CallsiteSamples += CS.second.getTotalSamples();
2524+
AttributeMismatchedSamples(State, CallsiteSamples);
25112525

2512-
if (IsCallsiteMismatched(I.first))
2526+
if (State == MatchState::Mismatched)
25132527
continue;
25142528

2515-
// Count mismatched samples for matched inlines.
2529+
// When the current level of inlined call site matches the profiled call
2530+
// site, we need to go deeper along the inline tree to count mismatches from
2531+
// lower level inlinees.
25162532
for (const auto &CS : I.second)
25172533
countMismatchedCallsiteSamples(CS.second);
25182534
}
@@ -2539,29 +2555,21 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
25392555

25402556
// Count profile mismatches for profile staleness report.
25412557
for (const auto &F : M) {
2542-
if (ShouldSkipProfileLoading(F))
2558+
if (skipProfileForFunction(F))
25432559
continue;
25442560
// As the stats will be merged by linker, skip reporting the metrics for
25452561
// imported functions to avoid repeated counting.
25462562
if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
25472563
continue;
2548-
// Use top-level nested FS for counting profile mismatch metrics since
2549-
// currently once a callsite is mismatched, all its children profiles are
2550-
// dropped.
25512564
const auto *FS = Reader.getSamplesFor(F);
25522565
if (!FS)
25532566
continue;
2554-
25552567
TotalProfiledFunc++;
25562568
TotalFunctionSamples += FS->getTotalSamples();
25572569

2558-
if (FunctionSamples::ProfileIsProbeBased) {
2559-
const auto *FuncDesc = ProbeManager->getDesc(F);
2560-
if (FuncDesc && ProbeManager->profileIsHashMismatched(*FuncDesc, *FS))
2561-
NumMismatchedFunc++;
2562-
2563-
countMismatchedFuncSamples(*FS);
2564-
}
2570+
// Checksum mismatch is only used in pseudo-probe mode.
2571+
if (FunctionSamples::ProfileIsProbeBased)
2572+
countMismatchedFuncSamples(*FS, true);
25652573

25662574
// Count mismatches and samples for calliste.
25672575
countMismatchCallsites(*FS);
@@ -2570,23 +2578,23 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
25702578

25712579
if (ReportProfileStaleness) {
25722580
if (FunctionSamples::ProfileIsProbeBased) {
2573-
errs() << "(" << NumMismatchedFunc << "/" << TotalProfiledFunc << ")"
2581+
errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc << ")"
25742582
<< " of functions' profile are invalid and "
25752583
<< " (" << MismatchedFunctionSamples << "/" << TotalFunctionSamples
25762584
<< ")"
25772585
<< " of samples are discarded due to function hash mismatch.\n";
25782586
}
2579-
errs() << "(" << NumMismatchedCallsites << "/" << TotalProfiledCallsites
2580-
<< ")"
2587+
errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
2588+
<< TotalProfiledCallsites << ")"
25812589
<< " of callsites' profile are invalid and "
2582-
<< "(" << MismatchedCallsiteSamples << "/" << TotalFunctionSamples
2583-
<< ")"
2590+
<< "(" << (MismatchedCallsiteSamples + RecoveredCallsiteSamples)
2591+
<< "/" << TotalFunctionSamples << ")"
25842592
<< " of samples are discarded due to callsite location mismatch.\n";
2585-
errs() << "(" << NumRecoveredCallsites << "/" << TotalProfiledCallsites
2586-
<< ")"
2593+
errs() << "(" << NumRecoveredCallsites << "/"
2594+
<< (NumRecoveredCallsites + NumMismatchedCallsites) << ")"
25872595
<< " of callsites and "
2588-
<< "(" << RecoveredCallsiteSamples << "/" << TotalFunctionSamples
2589-
<< ")"
2596+
<< "(" << RecoveredCallsiteSamples << "/"
2597+
<< (RecoveredCallsiteSamples + MismatchedCallsiteSamples) << ")"
25902598
<< " of samples are recovered by stale profile matching.\n";
25912599
}
25922600

@@ -2596,7 +2604,7 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
25962604

25972605
SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
25982606
if (FunctionSamples::ProfileIsProbeBased) {
2599-
ProfStatsVec.emplace_back("NumMismatchedFunc", NumMismatchedFunc);
2607+
ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
26002608
ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
26012609
ProfStatsVec.emplace_back("MismatchedFunctionSamples",
26022610
MismatchedFunctionSamples);
@@ -2621,7 +2629,7 @@ void SampleProfileMatcher::runOnModule() {
26212629
ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
26222630
FunctionSamples::ProfileIsCS);
26232631
for (auto &F : M) {
2624-
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
2632+
if (skipProfileForFunction(F))
26252633
continue;
26262634
runOnFunction(F);
26272635
}

0 commit comments

Comments
 (0)