Skip to content

Commit ee5a970

Browse files
committed
improve profile density
1 parent 4c68de5 commit ee5a970

File tree

3 files changed

+79
-8
lines changed

3 files changed

+79
-8
lines changed

llvm/test/tools/llvm-profgen/profile-density.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
;CHECK-DENSITY: Sample PGO is estimated to optimize better with 3.1x more samples. Please consider increasing sampling rate or profiling for longer duration to get more samples.
88
;CHECK-DENSITY: Minimum profile density for hot functions with top 99.00% total samples: 3.2
99

10-
;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 128.3
10+
;CHECK-DENSITY-CS: Minimum profile density for hot functions with top 99.00% total samples: 619.0
1111

1212
; original code:
1313
; clang -O3 -g -fno-optimize-sibling-calls -fdebug-info-for-profiling qsort.c -o a.out

llvm/tools/llvm-profgen/ProfileGenerator.cpp

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ static cl::opt<double> HotFunctionDensityThreshold(
8383
static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
8484
llvm::cl::desc("show profile density details"),
8585
llvm::cl::Optional);
86+
static cl::opt<int> ProfileDensityHotFuncCutOff(
87+
"profile-density-hot-func-cutoff", llvm::cl::init(990000),
88+
llvm::cl::desc("Total sample cutoff for hot functions used to calculate "
89+
"the profile density."));
8690

8791
static cl::opt<bool> UpdateTotalSamples(
8892
"update-total-samples", llvm::cl::init(false),
@@ -177,7 +181,8 @@ void ProfileGeneratorBase::write() {
177181
write(std::move(WriterOrErr.get()), ProfileMap);
178182
}
179183

180-
void ProfileGeneratorBase::showDensitySuggestion(double Density) {
184+
void ProfileGeneratorBase::showDensitySuggestion(double Density,
185+
int DensityCutoffHot) {
181186
if (Density == 0.0)
182187
WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
183188
"set too low. Please check your command.\n";
@@ -190,9 +195,7 @@ void ProfileGeneratorBase::showDensitySuggestion(double Density) {
190195

191196
if (ShowDensity)
192197
outs() << "Minimum profile density for hot functions with top "
193-
<< format("%.2f",
194-
static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
195-
10000)
198+
<< format("%.2f", static_cast<double>(DensityCutoffHot) / 10000)
196199
<< "% total samples: " << format("%.1f", Density) << "\n";
197200
}
198201

@@ -771,7 +774,7 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
771774
void ProfileGeneratorBase::calculateAndShowDensity(
772775
const SampleProfileMap &Profiles) {
773776
double Density = calculateDensity(Profiles, HotCountThreshold);
774-
showDensitySuggestion(Density);
777+
showDensitySuggestion(Density, ProfileSummaryCutoffHot);
775778
}
776779

777780
FunctionSamples *
@@ -1032,6 +1035,68 @@ void CSProfileGenerator::convertToProfileMap() {
10321035
IsProfileValidOnTrie = false;
10331036
}
10341037

1038+
void CSProfileGenerator::calculateAndShowDensity(
1039+
SampleContextTracker &CTracker) {
1040+
double Density = calculateDensity(CTracker);
1041+
showDensitySuggestion(Density, ProfileDensityHotFuncCutOff);
1042+
}
1043+
1044+
double CSProfileGenerator::calculateDensity(SampleContextTracker &CTracker) {
1045+
double Density = 0.0;
1046+
1047+
uint64_t TotalProfileBodySamples = 0;
1048+
// A list of the function profile density and total samples.
1049+
std::vector<std::pair<double, uint64_t>> DensityList;
1050+
for (const auto *Node : CTracker) {
1051+
const auto *FSamples = Node->getFunctionSamples();
1052+
if (!FSamples)
1053+
continue;
1054+
1055+
uint64_t TotalBodySamples = 0;
1056+
uint64_t FuncBodySize = 0;
1057+
for (const auto &I : FSamples->getBodySamples()) {
1058+
TotalBodySamples += I.second.getSamples();
1059+
FuncBodySize++;
1060+
}
1061+
// The whole function could be inlined and optimized out, use the callsite
1062+
// head samples instead to estimate the body count.
1063+
if (FuncBodySize == 0) {
1064+
for (const auto &CallsiteSamples : FSamples->getCallsiteSamples()) {
1065+
FuncBodySize++;
1066+
for (const auto &Callee : CallsiteSamples.second)
1067+
TotalBodySamples += Callee.second.getHeadSamplesEstimate();
1068+
}
1069+
}
1070+
1071+
if (FuncBodySize == 0)
1072+
continue;
1073+
1074+
double CurrDensity = static_cast<double>(TotalBodySamples) / FuncBodySize;
1075+
TotalProfileBodySamples += TotalBodySamples;
1076+
DensityList.emplace_back(CurrDensity, TotalBodySamples);
1077+
}
1078+
1079+
// Sorted by the density in descending order.
1080+
llvm::stable_sort(DensityList, [&](const std::pair<double, uint64_t> &A,
1081+
const std::pair<double, uint64_t> &B) {
1082+
if (A.first != B.first)
1083+
return A.first > B.first;
1084+
return A.second < B.second;
1085+
});
1086+
1087+
uint64_t CurrentSamples = 0;
1088+
for (auto &P : DensityList) {
1089+
CurrentSamples += P.second;
1090+
Density = P.first;
1091+
if (CurrentSamples >= TotalProfileBodySamples *
1092+
static_cast<float>(ProfileDensityHotFuncCutOff) /
1093+
1000000)
1094+
break;
1095+
}
1096+
1097+
return Density;
1098+
}
1099+
10351100
void CSProfileGenerator::postProcessProfiles() {
10361101
// Compute hot/cold threshold based on profile. This will be used for cold
10371102
// context profile merging/trimming.
@@ -1041,6 +1106,7 @@ void CSProfileGenerator::postProcessProfiles() {
10411106
// inline decisions.
10421107
if (EnableCSPreInliner) {
10431108
ContextTracker.populateFuncToCtxtMap();
1109+
calculateAndShowDensity(ContextTracker);
10441110
CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
10451111
// Turn off the profile merger by default unless it is explicitly enabled.
10461112
if (!CSProfMergeColdContext.getNumOccurrences())
@@ -1061,7 +1127,9 @@ void CSProfileGenerator::postProcessProfiles() {
10611127
sampleprof::SampleProfileMap ContextLessProfiles;
10621128
ProfileConverter::flattenProfile(ProfileMap, ContextLessProfiles, true);
10631129

1064-
calculateAndShowDensity(ContextLessProfiles);
1130+
if (!EnableCSPreInliner)
1131+
ProfileGeneratorBase::calculateAndShowDensity(ContextLessProfiles);
1132+
10651133
if (GenCSNestedProfile) {
10661134
ProfileConverter CSConverter(ProfileMap);
10671135
CSConverter.convertCSProfiles();

llvm/tools/llvm-profgen/ProfileGenerator.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ class ProfileGeneratorBase {
121121
double calculateDensity(const SampleProfileMap &Profiles,
122122
uint64_t HotCntThreshold);
123123

124-
void showDensitySuggestion(double Density);
124+
void showDensitySuggestion(double Density, int DensityCutoffHot);
125125

126126
void collectProfiledFunctions();
127127

@@ -363,6 +363,9 @@ class CSProfileGenerator : public ProfileGeneratorBase {
363363

364364
void computeSummaryAndThreshold();
365365

366+
void calculateAndShowDensity(SampleContextTracker &CTracker);
367+
double calculateDensity(SampleContextTracker &CTracker);
368+
366369
bool collectFunctionsFromLLVMProfile(
367370
std::unordered_set<const BinaryFunction *> &ProfiledFunctions) override;
368371

0 commit comments

Comments
 (0)