@@ -447,8 +447,12 @@ class SampleProfileMatcher {
447
447
// Match state for an anchor/callsite.
448
448
enum class MatchState {
449
449
Matched = 0 ,
450
- Mismatched = 0x1 ,
451
- Recovered = 0x1 ,
450
+ Mismatched = 1 ,
451
+ // Stay Matched after profile matching.
452
+ StayMatched = 2 ,
453
+ // Recovered from Mismatched after profile matching.
454
+ Recovered = 3 ,
455
+ Unknown = 32 ,
452
456
};
453
457
454
458
// For each function, store every callsite state into a map, of which each
@@ -457,19 +461,17 @@ class SampleProfileMatcher {
457
461
StringMap<std::unordered_map<LineLocation, MatchState, LineLocationHash>>
458
462
FuncCallsiteMatchStates;
459
463
460
- // / Profile mismatch statstics:
464
+ // Profile mismatch statstics:
461
465
uint64_t TotalProfiledFunc = 0 ;
462
- // Num of function whose checksum is mismatched.
463
- uint64_t NumMismatchedFunc = 0 ;
466
+ // Num of checksum- mismatched function .
467
+ uint64_t NumStaleProfileFunc = 0 ;
464
468
uint64_t TotalProfiledCallsites = 0 ;
465
469
uint64_t NumMismatchedCallsites = 0 ;
466
470
uint64_t NumRecoveredCallsites = 0 ;
467
-
468
- // / Weigted profile samples mismatch statstics:
471
+ // Total samples for all profiled functions.
469
472
uint64_t TotalFunctionSamples = 0 ;
470
- // Samples for the mismatched checksum functions;
473
+ // Total samples for all checksum- mismatched functions.
471
474
uint64_t MismatchedFunctionSamples = 0 ;
472
-
473
475
uint64_t MismatchedCallsiteSamples = 0 ;
474
476
uint64_t RecoveredCallsiteSamples = 0 ;
475
477
@@ -504,11 +506,11 @@ class SampleProfileMatcher {
504
506
const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
505
507
const std::map<LineLocation, std::unordered_set<FunctionId>>
506
508
&ProfileAnchors,
507
- const LocToLocMap & IRToProfileLocationMap);
509
+ const LocToLocMap * IRToProfileLocationMap);
508
510
509
511
// Count the samples of checksum mismatched function for the top-level
510
512
// function and all inlinees.
511
- void countMismatchedFuncSamples (const FunctionSamples &FS);
513
+ void countMismatchedFuncSamples (const FunctionSamples &FS, bool IsTopLevel );
512
514
// Count the number of mismatched or recovered callsites.
513
515
void countMismatchCallsites (const FunctionSamples &FS);
514
516
// Count the samples of mismatched or recovered callsites for top-level
@@ -715,7 +717,7 @@ void SampleProfileLoaderBaseImpl<Function>::computeDominanceAndLoopInfo(
715
717
}
716
718
} // namespace llvm
717
719
718
- bool ShouldSkipProfileLoading (const Function &F) {
720
+ static bool skipProfileForFunction (const Function &F) {
719
721
return F.isDeclaration () || !F.hasFnAttribute (" use-sample-profile" );
720
722
}
721
723
@@ -1903,7 +1905,7 @@ SampleProfileLoader::buildProfiledCallGraph(Module &M) {
1903
1905
// the profile. This makes sure functions missing from the profile still
1904
1906
// gets a chance to be processed.
1905
1907
for (Function &F : M) {
1906
- if (F. isDeclaration () || !F. hasFnAttribute ( " use-sample-profile " ))
1908
+ if (skipProfileForFunction (F ))
1907
1909
continue ;
1908
1910
ProfiledCG->addProfiledFunction (
1909
1911
getRepInFormat (FunctionSamples::getCanonicalFnName (F)));
@@ -1932,7 +1934,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
1932
1934
}
1933
1935
1934
1936
for (Function &F : M)
1935
- if (!F. isDeclaration () && F. hasFnAttribute ( " use-sample-profile " ))
1937
+ if (!skipProfileForFunction (F ))
1936
1938
FunctionOrderList.push_back (&F);
1937
1939
return FunctionOrderList;
1938
1940
}
@@ -1998,7 +2000,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
1998
2000
}
1999
2001
for (auto *Node : Range) {
2000
2002
Function *F = SymbolMap.lookup (Node->Name );
2001
- if (F && !F-> isDeclaration () && F-> hasFnAttribute ( " use-sample-profile " ))
2003
+ if (F && !skipProfileForFunction (*F ))
2002
2004
FunctionOrderList.push_back (F);
2003
2005
}
2004
2006
++CGI;
@@ -2009,7 +2011,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
2009
2011
for (LazyCallGraph::SCC &C : RC) {
2010
2012
for (LazyCallGraph::Node &N : C) {
2011
2013
Function &F = N.getFunction ();
2012
- if (!F. isDeclaration () && F. hasFnAttribute ( " use-sample-profile " ))
2014
+ if (!skipProfileForFunction (F ))
2013
2015
FunctionOrderList.push_back (&F);
2014
2016
}
2015
2017
}
@@ -2374,7 +2376,7 @@ void SampleProfileMatcher::runOnFunction(const Function &F) {
2374
2376
2375
2377
// Compute the callsite match states for profile staleness report.
2376
2378
if (ReportProfileStaleness || PersistProfileStaleness)
2377
- computeCallsiteMatchStates (F, IRAnchors, ProfileAnchors, LocToLocMap () );
2379
+ computeCallsiteMatchStates (F, IRAnchors, ProfileAnchors, nullptr );
2378
2380
2379
2381
// Run profile matching for checksum mismatched profile, currently only
2380
2382
// support for pseudo-probe.
@@ -2386,32 +2388,32 @@ void SampleProfileMatcher::runOnFunction(const Function &F) {
2386
2388
runStaleProfileMatching (F, IRAnchors, ProfileAnchors,
2387
2389
IRToProfileLocationMap);
2388
2390
// Find and update callsite match states after matching.
2389
- if ((ReportProfileStaleness || PersistProfileStaleness) &&
2390
- !IRToProfileLocationMap.empty ())
2391
+ if (ReportProfileStaleness || PersistProfileStaleness)
2391
2392
computeCallsiteMatchStates (F, IRAnchors, ProfileAnchors,
2392
- IRToProfileLocationMap);
2393
+ & IRToProfileLocationMap);
2393
2394
}
2394
2395
}
2395
2396
2396
2397
void SampleProfileMatcher::computeCallsiteMatchStates (
2397
2398
const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
2398
2399
const std::map<LineLocation, std::unordered_set<FunctionId>>
2399
2400
&ProfileAnchors,
2400
- const LocToLocMap &IRToProfileLocationMap) {
2401
- // Use the matching result to determine if it's in post-match phrase.
2402
- bool IsPostMatch = !IRToProfileLocationMap.empty ();
2403
- auto &MismatchedCallsites =
2401
+ const LocToLocMap *IRToProfileLocationMap) {
2402
+ bool IsPostMatch = IRToProfileLocationMap != nullptr ;
2403
+ auto &CallsiteMatchStates =
2404
2404
FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName (F.getName ())];
2405
2405
2406
+ // IRToProfileLocationMap is null before the matching.
2406
2407
auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
2407
- const auto &ProfileLoc = IRToProfileLocationMap.find (IRLoc);
2408
- if (ProfileLoc != IRToProfileLocationMap.end ())
2408
+ if (!IRToProfileLocationMap)
2409
+ return IRLoc;
2410
+ const auto &ProfileLoc = IRToProfileLocationMap->find (IRLoc);
2411
+ if (ProfileLoc != IRToProfileLocationMap->end ())
2409
2412
return ProfileLoc->second ;
2410
2413
else
2411
2414
return IRLoc;
2412
2415
};
2413
2416
2414
- std::set<LineLocation> MatchedCallsites;
2415
2417
for (const auto &I : IRAnchors) {
2416
2418
// In post-match, use the matching result to remap the current IR callsite.
2417
2419
const auto &Loc = MapIRLocToProfileLoc (I.first );
@@ -2421,18 +2423,27 @@ void SampleProfileMatcher::computeCallsiteMatchStates(
2421
2423
continue ;
2422
2424
const auto &Callees = It->second ;
2423
2425
2426
+ bool IsCallsiteMatched = false ;
2424
2427
// Since indirect call does not have CalleeName, check conservatively if
2425
2428
// callsite in the profile is a callsite location. This is to reduce num of
2426
2429
// false positive since otherwise all the indirect call samples will be
2427
2430
// reported as mismatching.
2428
2431
if (IRCalleeName == SampleProfileMatcher::UnknownIndirectCallee)
2429
- MatchedCallsites.insert (Loc);
2430
- // TODO : Ideally, we should ensure it's a direct callsite location(Callees
2431
- // size is 1). However, there may be a bug for profile merge(like ODR
2432
- // violation) that causes the callees size to be more than 1. After we fix
2433
- // the bug, we can remove this check.
2434
- else if (Callees.count (getRepInFormat (IRCalleeName)))
2435
- MatchedCallsites.insert (Loc);
2432
+ IsCallsiteMatched = true ;
2433
+ else if (Callees.size () == 1 && Callees.count (getRepInFormat (IRCalleeName)))
2434
+ IsCallsiteMatched = true ;
2435
+
2436
+ if (IsCallsiteMatched) {
2437
+ auto R = CallsiteMatchStates.emplace (Loc, MatchState::Matched);
2438
+ // Update the post-match state when there is a existing state indicateing
2439
+ // it's in post-match phrase.
2440
+ if (!R.second ) {
2441
+ if (R.first ->second == MatchState::Mismatched)
2442
+ R.first ->second = MatchState::Recovered;
2443
+ if (R.first ->second == MatchState::Matched)
2444
+ R.first ->second = MatchState::StayMatched;
2445
+ }
2446
+ }
2436
2447
}
2437
2448
2438
2449
// Check if there are any callsites in the profile that does not match to any
@@ -2441,37 +2452,40 @@ void SampleProfileMatcher::computeCallsiteMatchStates(
2441
2452
const auto &Loc = I.first ;
2442
2453
[[maybe_unused]] const auto &Callees = I.second ;
2443
2454
assert (!Callees.empty () && " Callees should not be empty" );
2444
- if (IsPostMatch) {
2445
- if (MatchedCallsites.count (Loc)) {
2446
- auto It = MismatchedCallsites.find (Loc);
2447
- if (It != MismatchedCallsites.end () &&
2448
- It->second == MatchState::Mismatched)
2449
- MismatchedCallsites.emplace (Loc, MatchState::Recovered);
2450
- } else
2451
- MismatchedCallsites.emplace (Loc, MatchState::Mismatched);
2452
- } else {
2453
- if (MatchedCallsites.count (Loc))
2454
- MismatchedCallsites.emplace (Loc, MatchState::Matched);
2455
- else
2456
- MismatchedCallsites.emplace (Loc, MatchState::Mismatched);
2457
- }
2455
+ auto It = CallsiteMatchStates.find (Loc);
2456
+ if (It == CallsiteMatchStates.end ())
2457
+ CallsiteMatchStates.emplace (Loc, MatchState::Mismatched);
2458
+ // If in post-match, the state is not updated to Recovered or StayMatched,
2459
+ // update it to Mismatched.
2460
+ else if (IsPostMatch && It->second == MatchState::Matched)
2461
+ CallsiteMatchStates.emplace (Loc, MatchState::Mismatched);
2458
2462
}
2459
2463
}
2460
2464
2461
- void SampleProfileMatcher::countMismatchedFuncSamples (
2462
- const FunctionSamples &FS ) {
2465
+ void SampleProfileMatcher::countMismatchedFuncSamples (const FunctionSamples &FS,
2466
+ bool IsTopLevel ) {
2463
2467
const auto *FuncDesc = ProbeManager->getDesc (FS.getGUID ());
2464
2468
// Skip the function that is external or renamed.
2465
2469
if (!FuncDesc)
2466
2470
return ;
2467
2471
2468
2472
if (ProbeManager->profileIsHashMismatched (*FuncDesc, FS)) {
2473
+ if (IsTopLevel)
2474
+ NumStaleProfileFunc++;
2475
+ // Once the checksum is mismatched, it's likely all the callites are
2476
+ // mismatched and dropped, we conservatively count all the samples as
2477
+ // mismatched samples and stop counting the inlinee profile.
2469
2478
MismatchedFunctionSamples += FS.getTotalSamples ();
2470
2479
return ;
2471
2480
}
2481
+
2482
+ // Even the current function checksum is matched, it's possible that the
2483
+ // inlinees' checksums are mismatched, we need to go deeper to check the
2484
+ // inlinee's function samples. Similarly, if the inlinee's checksum is
2485
+ // mismatched, we stop and count all the samples as mismatched samples.
2472
2486
for (const auto &I : FS.getCallsiteSamples ())
2473
2487
for (const auto &CS : I.second )
2474
- countMismatchedFuncSamples (CS.second );
2488
+ countMismatchedFuncSamples (CS.second , false );
2475
2489
}
2476
2490
2477
2491
void SampleProfileMatcher::countMismatchedCallsiteSamples (
@@ -2480,39 +2494,41 @@ void SampleProfileMatcher::countMismatchedCallsiteSamples(
2480
2494
// Skip it if no mismatched callsite or this is an external function.
2481
2495
if (It == FuncCallsiteMatchStates.end () || It->second .empty ())
2482
2496
return ;
2483
- const auto &MismatchCallsites = It->second ;
2497
+ const auto &CallsiteMatchStates = It->second ;
2484
2498
2485
- auto IsCallsiteMismatched = [&](const LineLocation &Loc) {
2486
- auto It = MismatchCallsites .find (Loc);
2487
- if (It == MismatchCallsites .end ())
2488
- return false ;
2489
- return It->second == MatchState::Mismatched ;
2499
+ auto findMatchState = [&](const LineLocation &Loc) {
2500
+ auto It = CallsiteMatchStates .find (Loc);
2501
+ if (It == CallsiteMatchStates .end ())
2502
+ return MatchState::Unknown ;
2503
+ return It->second ;
2490
2504
};
2491
2505
2492
- auto CountSamples = [&](const LineLocation &Loc, uint64_t Samples) {
2493
- auto It = MismatchCallsites.find (Loc);
2494
- if (It == MismatchCallsites.end ())
2495
- return ;
2496
- if (It->second == MatchState::Mismatched)
2506
+ auto AttributeMismatchedSamples = [&](const enum MatchState &State,
2507
+ uint64_t Samples) {
2508
+ if (State == MatchState::Mismatched)
2497
2509
MismatchedCallsiteSamples += Samples;
2498
- else if (It-> second == MatchState::Recovered)
2510
+ else if (State == MatchState::Recovered)
2499
2511
RecoveredCallsiteSamples += Samples;
2500
2512
};
2501
2513
2514
+ // The non-inlined callsites are saved in the body samples of function
2515
+ // profile.
2502
2516
for (const auto &I : FS.getBodySamples ())
2503
- CountSamples ( I.first , I.second .getSamples ());
2517
+ AttributeMismatchedSamples ( findMatchState ( I.first ) , I.second .getSamples ());
2504
2518
2505
2519
for (const auto &I : FS.getCallsiteSamples ()) {
2506
- uint64_t Samples = 0 ;
2520
+ auto State = findMatchState (I.first );
2521
+ uint64_t CallsiteSamples = 0 ;
2507
2522
for (const auto &CS : I.second )
2508
- Samples += CS.second .getTotalSamples ();
2509
-
2510
- CountSamples (I.first , Samples);
2523
+ CallsiteSamples += CS.second .getTotalSamples ();
2524
+ AttributeMismatchedSamples (State, CallsiteSamples);
2511
2525
2512
- if (IsCallsiteMismatched (I. first ) )
2526
+ if (State == MatchState::Mismatched )
2513
2527
continue ;
2514
2528
2515
- // Count mismatched samples for matched inlines.
2529
+ // When the current level of inlined call site matches the profiled call
2530
+ // site, we need to go deeper along the inline tree to count mismatches from
2531
+ // lower level inlinees.
2516
2532
for (const auto &CS : I.second )
2517
2533
countMismatchedCallsiteSamples (CS.second );
2518
2534
}
@@ -2539,29 +2555,21 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
2539
2555
2540
2556
// Count profile mismatches for profile staleness report.
2541
2557
for (const auto &F : M) {
2542
- if (ShouldSkipProfileLoading (F))
2558
+ if (skipProfileForFunction (F))
2543
2559
continue ;
2544
2560
// As the stats will be merged by linker, skip reporting the metrics for
2545
2561
// imported functions to avoid repeated counting.
2546
2562
if (GlobalValue::isAvailableExternallyLinkage (F.getLinkage ()))
2547
2563
continue ;
2548
- // Use top-level nested FS for counting profile mismatch metrics since
2549
- // currently once a callsite is mismatched, all its children profiles are
2550
- // dropped.
2551
2564
const auto *FS = Reader.getSamplesFor (F);
2552
2565
if (!FS)
2553
2566
continue ;
2554
-
2555
2567
TotalProfiledFunc++;
2556
2568
TotalFunctionSamples += FS->getTotalSamples ();
2557
2569
2558
- if (FunctionSamples::ProfileIsProbeBased) {
2559
- const auto *FuncDesc = ProbeManager->getDesc (F);
2560
- if (FuncDesc && ProbeManager->profileIsHashMismatched (*FuncDesc, *FS))
2561
- NumMismatchedFunc++;
2562
-
2563
- countMismatchedFuncSamples (*FS);
2564
- }
2570
+ // Checksum mismatch is only used in pseudo-probe mode.
2571
+ if (FunctionSamples::ProfileIsProbeBased)
2572
+ countMismatchedFuncSamples (*FS, true );
2565
2573
2566
2574
// Count mismatches and samples for calliste.
2567
2575
countMismatchCallsites (*FS);
@@ -2570,23 +2578,23 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
2570
2578
2571
2579
if (ReportProfileStaleness) {
2572
2580
if (FunctionSamples::ProfileIsProbeBased) {
2573
- errs () << " (" << NumMismatchedFunc << " /" << TotalProfiledFunc << " )"
2581
+ errs () << " (" << NumStaleProfileFunc << " /" << TotalProfiledFunc << " )"
2574
2582
<< " of functions' profile are invalid and "
2575
2583
<< " (" << MismatchedFunctionSamples << " /" << TotalFunctionSamples
2576
2584
<< " )"
2577
2585
<< " of samples are discarded due to function hash mismatch.\n " ;
2578
2586
}
2579
- errs () << " (" << NumMismatchedCallsites << " /" << TotalProfiledCallsites
2580
- << " )"
2587
+ errs () << " (" << ( NumMismatchedCallsites + NumRecoveredCallsites) << " /"
2588
+ << TotalProfiledCallsites << " )"
2581
2589
<< " of callsites' profile are invalid and "
2582
- << " (" << MismatchedCallsiteSamples << " / " << TotalFunctionSamples
2583
- << " )"
2590
+ << " (" << ( MismatchedCallsiteSamples + RecoveredCallsiteSamples)
2591
+ << " / " << TotalFunctionSamples << " )"
2584
2592
<< " of samples are discarded due to callsite location mismatch.\n " ;
2585
- errs () << " (" << NumRecoveredCallsites << " /" << TotalProfiledCallsites
2586
- << " )"
2593
+ errs () << " (" << NumRecoveredCallsites << " /"
2594
+ << (NumRecoveredCallsites + NumMismatchedCallsites) << " )"
2587
2595
<< " of callsites and "
2588
- << " (" << RecoveredCallsiteSamples << " /" << TotalFunctionSamples
2589
- << " )"
2596
+ << " (" << RecoveredCallsiteSamples << " /"
2597
+ << (RecoveredCallsiteSamples + MismatchedCallsiteSamples) << " )"
2590
2598
<< " of samples are recovered by stale profile matching.\n " ;
2591
2599
}
2592
2600
@@ -2596,7 +2604,7 @@ void SampleProfileMatcher::computeAndReportProfileStaleness() {
2596
2604
2597
2605
SmallVector<std::pair<StringRef, uint64_t >> ProfStatsVec;
2598
2606
if (FunctionSamples::ProfileIsProbeBased) {
2599
- ProfStatsVec.emplace_back (" NumMismatchedFunc " , NumMismatchedFunc );
2607
+ ProfStatsVec.emplace_back (" NumStaleProfileFunc " , NumStaleProfileFunc );
2600
2608
ProfStatsVec.emplace_back (" TotalProfiledFunc" , TotalProfiledFunc);
2601
2609
ProfStatsVec.emplace_back (" MismatchedFunctionSamples" ,
2602
2610
MismatchedFunctionSamples);
@@ -2621,7 +2629,7 @@ void SampleProfileMatcher::runOnModule() {
2621
2629
ProfileConverter::flattenProfile (Reader.getProfiles (), FlattenedProfiles,
2622
2630
FunctionSamples::ProfileIsCS);
2623
2631
for (auto &F : M) {
2624
- if (F. isDeclaration () || !F. hasFnAttribute ( " use-sample-profile " ))
2632
+ if (skipProfileForFunction (F ))
2625
2633
continue ;
2626
2634
runOnFunction (F);
2627
2635
}
0 commit comments