@@ -83,6 +83,10 @@ static cl::opt<double> HotFunctionDensityThreshold(
83
83
static cl::opt<bool > ShowDensity (" show-density" , llvm::cl::init(false ),
84
84
llvm::cl::desc(" show profile density details" ),
85
85
llvm::cl::Optional);
86
+ static cl::opt<int > ProfileDensityHotFuncCutOff (
87
+ " profile-density-hot-func-cutoff" , llvm::cl::init(990000 ),
88
+ llvm::cl::desc(" Total sample cutoff for hot functions used to calculate "
89
+ " the profile density." ));
86
90
87
91
static cl::opt<bool > UpdateTotalSamples (
88
92
" update-total-samples" , llvm::cl::init(false ),
@@ -177,7 +181,8 @@ void ProfileGeneratorBase::write() {
177
181
write (std::move (WriterOrErr.get ()), ProfileMap);
178
182
}
179
183
180
- void ProfileGeneratorBase::showDensitySuggestion (double Density) {
184
+ void ProfileGeneratorBase::showDensitySuggestion (double Density,
185
+ int DensityCutoffHot) {
181
186
if (Density == 0.0 )
182
187
WithColor::warning () << " The --profile-summary-cutoff-hot option may be "
183
188
" set too low. Please check your command.\n " ;
@@ -190,9 +195,7 @@ void ProfileGeneratorBase::showDensitySuggestion(double Density) {
190
195
191
196
if (ShowDensity)
192
197
outs () << " Minimum profile density for hot functions with top "
193
- << format (" %.2f" ,
194
- static_cast <double >(ProfileSummaryCutoffHot.getValue ()) /
195
- 10000 )
198
+ << format (" %.2f" , static_cast <double >(DensityCutoffHot) / 10000 )
196
199
<< " % total samples: " << format (" %.1f" , Density) << " \n " ;
197
200
}
198
201
@@ -771,7 +774,7 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
771
774
void ProfileGeneratorBase::calculateAndShowDensity (
772
775
const SampleProfileMap &Profiles) {
773
776
double Density = calculateDensity (Profiles, HotCountThreshold);
774
- showDensitySuggestion (Density);
777
+ showDensitySuggestion (Density, ProfileSummaryCutoffHot );
775
778
}
776
779
777
780
FunctionSamples *
@@ -1032,6 +1035,78 @@ void CSProfileGenerator::convertToProfileMap() {
1032
1035
IsProfileValidOnTrie = false ;
1033
1036
}
1034
1037
1038
+ void CSProfileGenerator::calculateAndShowDensity (
1039
+ SampleContextTracker &CTracker) {
1040
+ double Density = calculateDensity (CTracker);
1041
+ showDensitySuggestion (Density, ProfileDensityHotFuncCutOff);
1042
+ }
1043
+
1044
+ // Calculate Profile-density:
1045
+ // Sort the list of function-density in descending order and iterate them once
1046
+ // their accumulated total samples exceeds the percentage_threshold of total
1047
+ // profile samples, the profile-density is the last(minimum) function-density of
1048
+ // the processed functions, which means all the functions significant to perf
1049
+ // are on good density if the profile-density is good, or in other words, if the
1050
+ // profile-density is bad, the accumulated samples for all the bad density
1051
+ // profile exceeds the (100% - percentage_threshold).
1052
+ // The percentage_threshold(--profile-density-hot-func-cutoff) is configurable
1053
+ // depending on how much regression the system want to tolerate.
1054
+ double CSProfileGenerator::calculateDensity (SampleContextTracker &CTracker) {
1055
+ double ProfileDensity = 0.0 ;
1056
+
1057
+ uint64_t TotalProfileSamples = 0 ;
1058
+ // A list of the function profile density and total samples.
1059
+ std::vector<std::pair<double , uint64_t >> DensityList;
1060
+ for (const auto *Node : CTracker) {
1061
+ const auto *FSamples = Node->getFunctionSamples ();
1062
+ if (!FSamples)
1063
+ continue ;
1064
+
1065
+ uint64_t TotalBodySamples = 0 ;
1066
+ uint64_t FuncBodySize = 0 ;
1067
+ for (const auto &I : FSamples->getBodySamples ()) {
1068
+ TotalBodySamples += I.second .getSamples ();
1069
+ FuncBodySize++;
1070
+ }
1071
+ // The whole function could be inlined and optimized out, use the callsite
1072
+ // head samples instead to estimate the body count.
1073
+ if (FuncBodySize == 0 ) {
1074
+ for (const auto &CallsiteSamples : FSamples->getCallsiteSamples ()) {
1075
+ FuncBodySize++;
1076
+ for (const auto &Callee : CallsiteSamples.second )
1077
+ TotalBodySamples += Callee.second .getHeadSamplesEstimate ();
1078
+ }
1079
+ }
1080
+
1081
+ if (FuncBodySize == 0 )
1082
+ continue ;
1083
+
1084
+ double FuncDensity = static_cast <double >(TotalBodySamples) / FuncBodySize;
1085
+ TotalProfileSamples += TotalBodySamples;
1086
+ DensityList.emplace_back (FuncDensity, TotalBodySamples);
1087
+ }
1088
+
1089
+ // Sorted by the density in descending order.
1090
+ llvm::stable_sort (DensityList, [&](const std::pair<double , uint64_t > &A,
1091
+ const std::pair<double , uint64_t > &B) {
1092
+ if (A.first != B.first )
1093
+ return A.first > B.first ;
1094
+ return A.second < B.second ;
1095
+ });
1096
+
1097
+ uint64_t AccumulatedSamples = 0 ;
1098
+ for (const auto &P : DensityList) {
1099
+ AccumulatedSamples += P.second ;
1100
+ ProfileDensity = P.first ;
1101
+ if (AccumulatedSamples >=
1102
+ TotalProfileSamples * static_cast <float >(ProfileDensityHotFuncCutOff) /
1103
+ 1000000 )
1104
+ break ;
1105
+ }
1106
+
1107
+ return ProfileDensity;
1108
+ }
1109
+
1035
1110
void CSProfileGenerator::postProcessProfiles () {
1036
1111
// Compute hot/cold threshold based on profile. This will be used for cold
1037
1112
// context profile merging/trimming.
@@ -1041,6 +1116,7 @@ void CSProfileGenerator::postProcessProfiles() {
1041
1116
// inline decisions.
1042
1117
if (EnableCSPreInliner) {
1043
1118
ContextTracker.populateFuncToCtxtMap ();
1119
+ calculateAndShowDensity (ContextTracker);
1044
1120
CSPreInliner (ContextTracker, *Binary, Summary.get ()).run ();
1045
1121
// Turn off the profile merger by default unless it is explicitly enabled.
1046
1122
if (!CSProfMergeColdContext.getNumOccurrences ())
@@ -1061,7 +1137,9 @@ void CSProfileGenerator::postProcessProfiles() {
1061
1137
sampleprof::SampleProfileMap ContextLessProfiles;
1062
1138
ProfileConverter::flattenProfile (ProfileMap, ContextLessProfiles, true );
1063
1139
1064
- calculateAndShowDensity (ContextLessProfiles);
1140
+ if (!EnableCSPreInliner)
1141
+ ProfileGeneratorBase::calculateAndShowDensity (ContextLessProfiles);
1142
+
1065
1143
if (GenCSNestedProfile) {
1066
1144
ProfileConverter CSConverter (ProfileMap);
1067
1145
CSConverter.convertCSProfiles ();
0 commit comments