Skip to content

Commit 4cab01f

Browse files
authored
[BOLT] Profile quality stats -- CFG discontinuity (#109683)
In a perfect profile, each positive-execution-count block in the function’s CFG should be reachable from a positive-execution-count function entry block through a positive-execution-count path. This new pass checks how well the BOLT input profile satisfies this “CFG continuity” property. More specifically, for each of the hottest 1000 functions, the pass calculates the function’s fraction of basic block execution counts that is “unreachable”. It then reports the 95th percentile of the distribution of the 1000 unreachable fractions in a single BOLT-INFO line. The smaller the reported value is, the better the BOLT profile satisfies the CFG continuity property. The default value of 1000 above can be changed via the hidden BOLT option `-num-functions-for-continuity-check=[N]`. If more detailed stats are needed, `-v=1` can be added to the BOLT invocation: the hottest N functions will be grouped into 5 equally-sized buckets, from the hottest to the coldest; for each bucket, various summary statistics of the distribution of the fractions and the raw unreachable execution counts will be reported.
1 parent 1a19313 commit 4cab01f

File tree

5 files changed

+319
-0
lines changed

5 files changed

+319
-0
lines changed
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
//===- bolt/Passes/ContinuityStats.h ----------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass checks how well the BOLT input profile satisfies the following
10+
// "CFG continuity" property of a perfect profile:
11+
//
12+
// Each positive-execution-count block in the function’s CFG
13+
// should be *reachable* from a positive-execution-count function
14+
// entry block through a positive-execution-count path.
15+
//
16+
// More specifically, for each of the hottest 1000 functions, the pass
17+
// calculates the function’s fraction of basic block execution counts
18+
// that is *unreachable*. It then reports the 95th percentile of the
19+
// distribution of the 1000 unreachable fractions in a single BOLT-INFO line.
20+
// The smaller the reported value is, the better the BOLT profile
21+
// satisfies the CFG continuity property.
22+
23+
// The default value of 1000 above can be changed via the hidden BOLT option
24+
// `-num-functions-for-continuity-check=[N]`.
25+
// If more detailed stats are needed, `-v=1` can be used: the hottest N
26+
// functions will be grouped into 5 equally-sized buckets, from the hottest
27+
// to the coldest; for each bucket, various summary statistics of the
28+
// distribution of the unreachable fractions and the raw unreachable execution
29+
// counts will be reported.
30+
//
31+
//===----------------------------------------------------------------------===//
32+
33+
#ifndef BOLT_PASSES_CONTINUITYSTATS_H
34+
#define BOLT_PASSES_CONTINUITYSTATS_H
35+
36+
#include "bolt/Passes/BinaryPasses.h"
37+
#include <vector>
38+
39+
namespace llvm {
40+
41+
class raw_ostream;
42+
43+
namespace bolt {
44+
class BinaryContext;
45+
46+
/// Compute and report to the user the function CFG continuity quality
47+
class PrintContinuityStats : public BinaryFunctionPass {
48+
public:
49+
explicit PrintContinuityStats(const cl::opt<bool> &PrintPass)
50+
: BinaryFunctionPass(PrintPass) {}
51+
52+
bool shouldOptimize(const BinaryFunction &BF) const override;
53+
const char *getName() const override { return "continuity-stats"; }
54+
bool shouldPrint(const BinaryFunction &) const override { return false; }
55+
Error runOnFunctions(BinaryContext &BC) override;
56+
};
57+
58+
} // namespace bolt
59+
} // namespace llvm
60+
61+
#endif // BOLT_PASSES_CONTINUITYSTATS_H

bolt/lib/Passes/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ add_llvm_library(LLVMBOLTPasses
2626
PatchEntries.cpp
2727
PettisAndHansen.cpp
2828
PLTCall.cpp
29+
ContinuityStats.cpp
2930
RegAnalysis.cpp
3031
RegReAssign.cpp
3132
ReorderAlgorithm.cpp

bolt/lib/Passes/ContinuityStats.cpp

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
//===- bolt/Passes/ContinuityStats.cpp --------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements the continuity stats calculation pass.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "bolt/Passes/ContinuityStats.h"
14+
#include "bolt/Core/BinaryBasicBlock.h"
15+
#include "bolt/Core/BinaryFunction.h"
16+
#include "bolt/Utils/CommandLineOpts.h"
17+
#include "llvm/Support/CommandLine.h"
18+
#include <queue>
19+
#include <unordered_map>
20+
#include <unordered_set>
21+
22+
#define DEBUG_TYPE "bolt-opts"
23+
24+
using namespace llvm;
25+
using namespace bolt;
26+
27+
namespace opts {
28+
extern cl::opt<unsigned> Verbosity;
29+
cl::opt<unsigned> NumFunctionsForContinuityCheck(
30+
"num-functions-for-continuity-check",
31+
cl::desc("number of hottest functions to print aggregated "
32+
"CFG discontinuity stats of."),
33+
cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
34+
} // namespace opts
35+
36+
namespace {
37+
using FunctionListType = std::vector<const BinaryFunction *>;
38+
using function_iterator = FunctionListType::iterator;
39+
40+
template <typename T>
41+
void printDistribution(raw_ostream &OS, std::vector<T> &values,
42+
bool Fraction = false) {
43+
if (values.empty())
44+
return;
45+
// Sort values from largest to smallest and print the MAX, TOP 1%, 5%, 10%,
46+
// 20%, 50%, 80%, MIN. If Fraction is true, then values are printed as
47+
// fractions instead of integers.
48+
std::sort(values.begin(), values.end());
49+
50+
auto printLine = [&](std::string Text, double Percent) {
51+
int Rank = int(values.size() * (1.0 - Percent / 100));
52+
if (Percent == 0)
53+
Rank = values.size() - 1;
54+
if (Fraction)
55+
OS << " " << Text << std::string(9 - Text.length(), ' ') << ": "
56+
<< format("%.2lf%%", values[Rank] * 100) << "\n";
57+
else
58+
OS << " " << Text << std::string(9 - Text.length(), ' ') << ": "
59+
<< values[Rank] << "\n";
60+
};
61+
62+
printLine("MAX", 0);
63+
const int percentages[] = {1, 5, 10, 20, 50, 80};
64+
for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) {
65+
printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]);
66+
}
67+
printLine("MIN", 100);
68+
}
69+
70+
void printCFGContinuityStats(raw_ostream &OS,
71+
iterator_range<function_iterator> &Functions) {
72+
// Given a perfect profile, every positive-execution-count BB should be
73+
// connected to an entry of the function through a positive-execution-count
74+
// directed path in the control flow graph.
75+
std::vector<size_t> NumUnreachables;
76+
std::vector<size_t> SumECUnreachables;
77+
std::vector<double> FractionECUnreachables;
78+
79+
for (auto it = Functions.begin(); it != Functions.end(); ++it) {
80+
const BinaryFunction *Function = *it;
81+
if (Function->size() <= 1)
82+
continue;
83+
84+
// Compute the sum of all BB execution counts (ECs).
85+
size_t NumPosECBBs = 0;
86+
size_t SumAllBBEC = 0;
87+
for (const BinaryBasicBlock &BB : *Function) {
88+
const size_t BBEC = BB.getKnownExecutionCount();
89+
NumPosECBBs += BBEC > 0 ? 1 : 0;
90+
SumAllBBEC += BBEC;
91+
}
92+
93+
// Perform BFS on subgraph of CFG induced by positive weight edges.
94+
// Compute the number of BBs reachable from the entry(s) of the function and
95+
// the sum of their execution counts (ECs).
96+
std::unordered_map<unsigned, const BinaryBasicBlock *> IndexToBB;
97+
std::unordered_set<unsigned> Visited;
98+
std::queue<unsigned> Queue;
99+
for (const BinaryBasicBlock &BB : *Function) {
100+
// Make sure BB.getIndex() is not already in IndexToBB.
101+
assert(IndexToBB.find(BB.getIndex()) == IndexToBB.end());
102+
IndexToBB[BB.getIndex()] = &BB;
103+
if (BB.isEntryPoint() && BB.getKnownExecutionCount() > 0) {
104+
Queue.push(BB.getIndex());
105+
Visited.insert(BB.getIndex());
106+
}
107+
}
108+
while (!Queue.empty()) {
109+
const unsigned BBIndex = Queue.front();
110+
const BinaryBasicBlock *BB = IndexToBB[BBIndex];
111+
Queue.pop();
112+
auto SuccBIIter = BB->branch_info_begin();
113+
for (const BinaryBasicBlock *Succ : BB->successors()) {
114+
const uint64_t Count = SuccBIIter->Count;
115+
if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) {
116+
++SuccBIIter;
117+
continue;
118+
}
119+
if (!Visited.insert(Succ->getIndex()).second) {
120+
++SuccBIIter;
121+
continue;
122+
}
123+
Queue.push(Succ->getIndex());
124+
++SuccBIIter;
125+
}
126+
}
127+
128+
const size_t NumReachableBBs = Visited.size();
129+
130+
// Loop through Visited, and sum the corresponding BBs' execution counts
131+
// (ECs).
132+
size_t SumReachableBBEC = 0;
133+
for (const unsigned BBIndex : Visited) {
134+
const BinaryBasicBlock *BB = IndexToBB[BBIndex];
135+
SumReachableBBEC += BB->getKnownExecutionCount();
136+
}
137+
138+
const size_t NumPosECBBsUnreachableFromEntry =
139+
NumPosECBBs - NumReachableBBs;
140+
const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC;
141+
const double FractionECUnreachable =
142+
(double)SumUnreachableBBEC / SumAllBBEC;
143+
144+
if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) {
145+
OS << "Non-trivial CFG discontinuity observed in function "
146+
<< Function->getPrintName() << "\n";
147+
LLVM_DEBUG(Function->dump());
148+
}
149+
150+
NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry);
151+
SumECUnreachables.push_back(SumUnreachableBBEC);
152+
FractionECUnreachables.push_back(FractionECUnreachable);
153+
}
154+
155+
if (FractionECUnreachables.empty())
156+
return;
157+
158+
std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end());
159+
const int Rank = int(FractionECUnreachables.size() * 0.95);
160+
OS << format("top 5%% function CFG discontinuity is %.2lf%%\n",
161+
FractionECUnreachables[Rank] * 100);
162+
163+
if (opts::Verbosity >= 1) {
164+
OS << "abbreviations: EC = execution count, POS BBs = positive EC BBs\n"
165+
<< "distribution of NUM(unreachable POS BBs) among all focal "
166+
"functions\n";
167+
printDistribution(OS, NumUnreachables);
168+
169+
OS << "distribution of SUM_EC(unreachable POS BBs) among all focal "
170+
"functions\n";
171+
printDistribution(OS, SumECUnreachables);
172+
173+
OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all "
174+
"POS BBs))] among all focal functions\n";
175+
printDistribution(OS, FractionECUnreachables, /*Fraction=*/true);
176+
}
177+
}
178+
179+
void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
180+
size_t NumTopFunctions) {
181+
// Sort the list of functions by execution counts (reverse).
182+
llvm::sort(ValidFunctions,
183+
[&](const BinaryFunction *A, const BinaryFunction *B) {
184+
return A->getKnownExecutionCount() > B->getKnownExecutionCount();
185+
});
186+
187+
const size_t RealNumTopFunctions =
188+
std::min(NumTopFunctions, ValidFunctions.size());
189+
190+
iterator_range<function_iterator> Functions(
191+
ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions);
192+
193+
BC.outs() << format("BOLT-INFO: among the hottest %zu functions ",
194+
RealNumTopFunctions);
195+
printCFGContinuityStats(BC.outs(), Functions);
196+
197+
// Print more detailed bucketed stats if requested.
198+
if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) {
199+
const size_t PerBucketSize = RealNumTopFunctions / 5;
200+
BC.outs() << format(
201+
"Detailed stats for 5 buckets, each with %zu functions:\n",
202+
PerBucketSize);
203+
204+
// For each bucket, print the CFG continuity stats of the functions in the
205+
// bucket.
206+
for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) {
207+
const size_t StartIndex = BucketIndex * PerBucketSize;
208+
const size_t EndIndex = StartIndex + PerBucketSize;
209+
iterator_range<function_iterator> Functions(
210+
ValidFunctions.begin() + StartIndex,
211+
ValidFunctions.begin() + EndIndex);
212+
const size_t MaxFunctionExecutionCount =
213+
ValidFunctions[StartIndex]->getKnownExecutionCount();
214+
const size_t MinFunctionExecutionCount =
215+
ValidFunctions[EndIndex - 1]->getKnownExecutionCount();
216+
BC.outs() << format("----------------\n| Bucket %zu: "
217+
"|\n----------------\n",
218+
BucketIndex + 1)
219+
<< format(
220+
"execution counts of the %zu functions in the bucket: "
221+
"%zu-%zu\n",
222+
EndIndex - StartIndex, MinFunctionExecutionCount,
223+
MaxFunctionExecutionCount);
224+
printCFGContinuityStats(BC.outs(), Functions);
225+
}
226+
}
227+
}
228+
} // namespace
229+
230+
bool PrintContinuityStats::shouldOptimize(const BinaryFunction &BF) const {
231+
if (BF.empty() || !BF.hasValidProfile())
232+
return false;
233+
234+
return BinaryFunctionPass::shouldOptimize(BF);
235+
}
236+
237+
Error PrintContinuityStats::runOnFunctions(BinaryContext &BC) {
238+
// Create a list of functions with valid profiles.
239+
FunctionListType ValidFunctions;
240+
for (const auto &BFI : BC.getBinaryFunctions()) {
241+
const BinaryFunction *Function = &BFI.second;
242+
if (PrintContinuityStats::shouldOptimize(*Function))
243+
ValidFunctions.push_back(Function);
244+
}
245+
if (ValidFunctions.empty() || opts::NumFunctionsForContinuityCheck == 0)
246+
return Error::success();
247+
248+
printAll(BC, ValidFunctions, opts::NumFunctionsForContinuityCheck);
249+
return Error::success();
250+
}

bolt/lib/Rewrite/BinaryPassManager.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "bolt/Passes/AllocCombiner.h"
1313
#include "bolt/Passes/AsmDump.h"
1414
#include "bolt/Passes/CMOVConversion.h"
15+
#include "bolt/Passes/ContinuityStats.h"
1516
#include "bolt/Passes/FixRISCVCallsPass.h"
1617
#include "bolt/Passes/FixRelaxationPass.h"
1718
#include "bolt/Passes/FrameOptimizer.h"
@@ -373,6 +374,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
373374
if (opts::PrintProfileStats)
374375
Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));
375376

377+
Manager.registerPass(std::make_unique<PrintContinuityStats>(NeverPrint));
378+
376379
Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));
377380

378381
Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
## Check profile discontinuity reporting
2+
RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
3+
RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s
4+
CHECK: among the hottest 5 functions top 5% function CFG discontinuity is 100.00%

0 commit comments

Comments
 (0)