|
| 1 | +//===- bolt/Passes/ProfileQualityStats.h ------------------------*- C++ -*-===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// |
| 9 | +// This pass checks the BOLT input profile quality. |
| 10 | +// |
| 11 | +// Check 1: how well the input profile satisfies the following |
| 12 | +// "CFG continuity" property of a perfect profile: |
| 13 | +// |
| 14 | +// Each positive-execution-count block in the function’s CFG |
| 15 | +// is *reachable* from a positive-execution-count function |
| 16 | +// entry block through a positive-execution-count path. |
| 17 | +// |
| 18 | +// More specifically, for each of the hottest 1000 functions, the pass |
| 19 | +// calculates the function’s fraction of basic block execution counts |
| 20 | +// that is *unreachable*. It then reports the 95th percentile of the |
| 21 | +// distribution of the 1000 unreachable fractions in a single BOLT-INFO line. |
| 22 | +// The smaller the reported value is, the better the BOLT profile |
| 23 | +// satisfies the CFG continuity property. |
| 24 | +// |
| 25 | +// Check 2: how well the input profile satisfies the "call graph flow |
| 26 | +// conservation" property of a perfect profile: |
| 27 | +// |
| 28 | +// For each function that is not a program entry, the number of times the |
| 29 | +// function is called is equal to the net CFG outflow of the |
| 30 | +// function's entry block(s). |
| 31 | +// |
| 32 | +// More specifically, for each of the hottest 1000 functions, the pass obtains |
| 33 | +// A = number of times the function is called, B = the function's entry blocks' |
| 34 | +// inflow, C = the function's entry blocks' outflow, where B and C are computed |
| 35 | +// using the function's weighted CFG. It then computes gap = 1 - MIN(A,C-B) / |
| 36 | +// MAX(A, C-B). The pass reports the 95th percentile of the distribution of the |
| 37 | +// 1000 gaps in a single BOLT-INFO line. The smaller the reported value is, the |
| 38 | +// better the BOLT profile satisfies the call graph flow conservation property. |
| 39 | +// |
| 40 | +// Check 3: how well the input profile satisfies the "function CFG flow |
| 41 | +// conservation property" of a perfect profile: |
| 42 | +// |
| 43 | +// A non-entry non-exit basic block's inflow is equal to its outflow. |
| 44 | +// |
| 45 | +// More specifically, for each of the hottest 1000 functions, the pass loops |
| 46 | +// over its basic blocks that are non-entry and non-exit, and for each block |
| 47 | +// obtains a block gap = 1 - MIN(block inflow, block outflow, block call count |
| 48 | +// if any) / MAX(block inflow, block outflow, block call count if any). It then |
| 49 | +// aggregates the block gaps into 2 values for the function: "weighted" is the |
| 50 | +// weighted average of the block conservation gaps, where the weights depend on |
| 51 | +// each block's execution count and instruction count; "worst" is the worst |
| 52 | +// (biggest) block gap acorss all basic blocks in the function with an execution |
| 53 | +// count of > 500. The pass then reports the 95th percentile of the weighted and |
| 54 | +// worst values of the 1000 functions in a single BOLT-INFO line. The smaller |
| 55 | +// the reported values are, the better the BOLT profile satisfies the function |
| 56 | +// CFG flow conservation property. |
| 57 | +// |
| 58 | +// The default value of 1000 above can be changed via the hidden BOLT option |
| 59 | +// `-top-functions-for-profile-quality-check=[N]`. |
| 60 | +// The default reporting of the 95th percentile can be changed via the hidden |
| 61 | +// BOLT option `-percentile-for-profile-quality-check=[M]`. |
| 62 | +// |
| 63 | +// If more detailed stats are needed, `-v=1` can be used: the hottest N |
| 64 | +// functions will be grouped into 5 equally-sized buckets, from the hottest |
| 65 | +// to the coldest; for each bucket, various summary statistics of the |
| 66 | +// profile quality will be reported. |
| 67 | +// |
| 68 | +//===----------------------------------------------------------------------===// |
| 69 | + |
| 70 | +#ifndef BOLT_PASSES_PROFILEQUALITYSTATS_H |
| 71 | +#define BOLT_PASSES_PROFILEQUALITYSTATS_H |
| 72 | + |
| 73 | +#include "bolt/Passes/BinaryPasses.h" |
| 74 | +#include <vector> |
| 75 | + |
| 76 | +namespace llvm { |
| 77 | + |
| 78 | +class raw_ostream; |
| 79 | + |
| 80 | +namespace bolt { |
| 81 | +class BinaryContext; |
| 82 | + |
| 83 | +/// Compute and report to the user the profile quality |
| 84 | +class PrintProfileQualityStats : public BinaryFunctionPass { |
| 85 | +public: |
| 86 | + explicit PrintProfileQualityStats(const cl::opt<bool> &PrintPass) |
| 87 | + : BinaryFunctionPass(PrintPass) {} |
| 88 | + |
| 89 | + bool shouldOptimize(const BinaryFunction &BF) const override; |
| 90 | + const char *getName() const override { return "profile-quality-stats"; } |
| 91 | + bool shouldPrint(const BinaryFunction &) const override { return false; } |
| 92 | + Error runOnFunctions(BinaryContext &BC) override; |
| 93 | +}; |
| 94 | + |
| 95 | +} // namespace bolt |
| 96 | +} // namespace llvm |
| 97 | + |
| 98 | +#endif // BOLT_PASSES_PROFILEQUALITYSTATS_H |
0 commit comments