@@ -83,6 +83,10 @@ static cl::opt<double> HotFunctionDensityThreshold(
83
83
static cl::opt<bool > ShowDensity (" show-density" , llvm::cl::init(false ),
84
84
llvm::cl::desc(" show profile density details" ),
85
85
llvm::cl::Optional);
86
+ static cl::opt<int > ProfileDensityHotFuncCutOff (
87
+ " profile-density-hot-func-cutoff" , llvm::cl::init(990000 ),
88
+ llvm::cl::desc(" Total sample cutoff for hot functions used to calculate "
89
+ " the profile density." ));
86
90
87
91
static cl::opt<bool > UpdateTotalSamples (
88
92
" update-total-samples" , llvm::cl::init(false ),
@@ -177,7 +181,8 @@ void ProfileGeneratorBase::write() {
177
181
write (std::move (WriterOrErr.get ()), ProfileMap);
178
182
}
179
183
180
- void ProfileGeneratorBase::showDensitySuggestion (double Density) {
184
+ void ProfileGeneratorBase::showDensitySuggestion (double Density,
185
+ int DensityCutoffHot) {
181
186
if (Density == 0.0 )
182
187
WithColor::warning () << " The --profile-summary-cutoff-hot option may be "
183
188
" set too low. Please check your command.\n " ;
@@ -190,9 +195,7 @@ void ProfileGeneratorBase::showDensitySuggestion(double Density) {
190
195
191
196
if (ShowDensity)
192
197
outs () << " Minimum profile density for hot functions with top "
193
- << format (" %.2f" ,
194
- static_cast <double >(ProfileSummaryCutoffHot.getValue ()) /
195
- 10000 )
198
+ << format (" %.2f" , static_cast <double >(DensityCutoffHot) / 10000 )
196
199
<< " % total samples: " << format (" %.1f" , Density) << " \n " ;
197
200
}
198
201
@@ -771,7 +774,7 @@ void ProfileGenerator::populateBoundarySamplesForAllFunctions(
771
774
void ProfileGeneratorBase::calculateAndShowDensity (
772
775
const SampleProfileMap &Profiles) {
773
776
double Density = calculateDensity (Profiles, HotCountThreshold);
774
- showDensitySuggestion (Density);
777
+ showDensitySuggestion (Density, ProfileSummaryCutoffHot );
775
778
}
776
779
777
780
FunctionSamples *
@@ -1032,6 +1035,68 @@ void CSProfileGenerator::convertToProfileMap() {
1032
1035
IsProfileValidOnTrie = false ;
1033
1036
}
1034
1037
1038
+ void CSProfileGenerator::calculateAndShowDensity (
1039
+ SampleContextTracker &CTracker) {
1040
+ double Density = calculateDensity (CTracker);
1041
+ showDensitySuggestion (Density, ProfileDensityHotFuncCutOff);
1042
+ }
1043
+
1044
+ double CSProfileGenerator::calculateDensity (SampleContextTracker &CTracker) {
1045
+ double Density = 0.0 ;
1046
+
1047
+ uint64_t TotalProfileBodySamples = 0 ;
1048
+ // A list of the function profile density and total samples.
1049
+ std::vector<std::pair<double , uint64_t >> DensityList;
1050
+ for (const auto *Node : CTracker) {
1051
+ const auto *FSamples = Node->getFunctionSamples ();
1052
+ if (!FSamples)
1053
+ continue ;
1054
+
1055
+ uint64_t TotalBodySamples = 0 ;
1056
+ uint64_t FuncBodySize = 0 ;
1057
+ for (const auto &I : FSamples->getBodySamples ()) {
1058
+ TotalBodySamples += I.second .getSamples ();
1059
+ FuncBodySize++;
1060
+ }
1061
+ // The whole function could be inlined and optimized out, use the callsite
1062
+ // head samples instead to estimate the body count.
1063
+ if (FuncBodySize == 0 ) {
1064
+ for (const auto &CallsiteSamples : FSamples->getCallsiteSamples ()) {
1065
+ FuncBodySize++;
1066
+ for (const auto &Callee : CallsiteSamples.second )
1067
+ TotalBodySamples += Callee.second .getHeadSamplesEstimate ();
1068
+ }
1069
+ }
1070
+
1071
+ if (FuncBodySize == 0 )
1072
+ continue ;
1073
+
1074
+ double CurrDensity = static_cast <double >(TotalBodySamples) / FuncBodySize;
1075
+ TotalProfileBodySamples += TotalBodySamples;
1076
+ DensityList.emplace_back (CurrDensity, TotalBodySamples);
1077
+ }
1078
+
1079
+ // Sorted by the density in descending order.
1080
+ llvm::stable_sort (DensityList, [&](const std::pair<double , uint64_t > &A,
1081
+ const std::pair<double , uint64_t > &B) {
1082
+ if (A.first != B.first )
1083
+ return A.first > B.first ;
1084
+ return A.second < B.second ;
1085
+ });
1086
+
1087
+ uint64_t CurrentSamples = 0 ;
1088
+ for (auto &P : DensityList) {
1089
+ CurrentSamples += P.second ;
1090
+ Density = P.first ;
1091
+ if (CurrentSamples >= TotalProfileBodySamples *
1092
+ static_cast <float >(ProfileDensityHotFuncCutOff) /
1093
+ 1000000 )
1094
+ break ;
1095
+ }
1096
+
1097
+ return Density;
1098
+ }
1099
+
1035
1100
void CSProfileGenerator::postProcessProfiles () {
1036
1101
// Compute hot/cold threshold based on profile. This will be used for cold
1037
1102
// context profile merging/trimming.
@@ -1041,6 +1106,7 @@ void CSProfileGenerator::postProcessProfiles() {
1041
1106
// inline decisions.
1042
1107
if (EnableCSPreInliner) {
1043
1108
ContextTracker.populateFuncToCtxtMap ();
1109
+ calculateAndShowDensity (ContextTracker);
1044
1110
CSPreInliner (ContextTracker, *Binary, Summary.get ()).run ();
1045
1111
// Turn off the profile merger by default unless it is explicitly enabled.
1046
1112
if (!CSProfMergeColdContext.getNumOccurrences ())
@@ -1061,7 +1127,9 @@ void CSProfileGenerator::postProcessProfiles() {
1061
1127
sampleprof::SampleProfileMap ContextLessProfiles;
1062
1128
ProfileConverter::flattenProfile (ProfileMap, ContextLessProfiles, true );
1063
1129
1064
- calculateAndShowDensity (ContextLessProfiles);
1130
+ if (!EnableCSPreInliner)
1131
+ ProfileGeneratorBase::calculateAndShowDensity (ContextLessProfiles);
1132
+
1065
1133
if (GenCSNestedProfile) {
1066
1134
ProfileConverter CSConverter (ProfileMap);
1067
1135
CSConverter.convertCSProfiles ();
0 commit comments