Skip to content

[BOLT] Profile quality stats -- CFG discontinuity #109683

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions bolt/include/bolt/Passes/ContinuityStats.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
//===- bolt/Passes/ContinuityStats.h ----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass checks how well the BOLT input profile satisfies the following
// "CFG continuity" property of a perfect profile:
//
// Each positive-execution-count block in the function’s CFG
// should be *reachable* from a positive-execution-count function
// entry block through a positive-execution-count path.
//
// More specifically, for each of the hottest 1000 functions, the pass
// calculates the function’s fraction of basic block execution counts
// that is *unreachable*. It then reports the 95th percentile of the
// distribution of the 1000 unreachable fractions in a single BOLT-INFO line.
// The smaller the reported value is, the better the BOLT profile
// satisfies the CFG continuity property.

// The default value of 1000 above can be changed via the hidden BOLT option
// `-num-functions-for-continuity-check=[N]`.
// If more detailed stats are needed, `-v=1` can be used: the hottest N
// functions will be grouped into 5 equally-sized buckets, from the hottest
// to the coldest; for each bucket, various summary statistics of the
// distribution of the unreachable fractions and the raw unreachable execution
// counts will be reported.
//
//===----------------------------------------------------------------------===//

#ifndef BOLT_PASSES_CONTINUITYSTATS_H
#define BOLT_PASSES_CONTINUITYSTATS_H

#include "bolt/Passes/BinaryPasses.h"
#include <vector>

namespace llvm {

class raw_ostream;

namespace bolt {
class BinaryContext;

/// Compute and report to the user the function CFG continuity quality
class PrintContinuityStats : public BinaryFunctionPass {
public:
explicit PrintContinuityStats(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) {}

bool shouldOptimize(const BinaryFunction &BF) const override;
const char *getName() const override { return "continuity-stats"; }
bool shouldPrint(const BinaryFunction &) const override { return false; }
Error runOnFunctions(BinaryContext &BC) override;
};

} // namespace bolt
} // namespace llvm

#endif // BOLT_PASSES_CONTINUITYSTATS_H
1 change: 1 addition & 0 deletions bolt/lib/Passes/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ add_llvm_library(LLVMBOLTPasses
PatchEntries.cpp
PettisAndHansen.cpp
PLTCall.cpp
ContinuityStats.cpp
RegAnalysis.cpp
RegReAssign.cpp
ReorderAlgorithm.cpp
Expand Down
250 changes: 250 additions & 0 deletions bolt/lib/Passes/ContinuityStats.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
//===- bolt/Passes/ContinuityStats.cpp --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the continuity stats calculation pass.
//
//===----------------------------------------------------------------------===//

#include "bolt/Passes/ContinuityStats.h"
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/Support/CommandLine.h"
#include <queue>
#include <unordered_map>
#include <unordered_set>

#define DEBUG_TYPE "bolt-opts"

using namespace llvm;
using namespace bolt;

namespace opts {
extern cl::opt<unsigned> Verbosity;
cl::opt<unsigned> NumFunctionsForContinuityCheck(
"num-functions-for-continuity-check",
cl::desc("number of hottest functions to print aggregated "
"CFG discontinuity stats of."),
cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
} // namespace opts

namespace {
using FunctionListType = std::vector<const BinaryFunction *>;
using function_iterator = FunctionListType::iterator;

template <typename T>
void printDistribution(raw_ostream &OS, std::vector<T> &values,
bool Fraction = false) {
if (values.empty())
return;
// Sort values from largest to smallest and print the MAX, TOP 1%, 5%, 10%,
// 20%, 50%, 80%, MIN. If Fraction is true, then values are printed as
// fractions instead of integers.
std::sort(values.begin(), values.end());

auto printLine = [&](std::string Text, double Percent) {
int Rank = int(values.size() * (1.0 - Percent / 100));
if (Percent == 0)
Rank = values.size() - 1;
if (Fraction)
OS << " " << Text << std::string(9 - Text.length(), ' ') << ": "
<< format("%.2lf%%", values[Rank] * 100) << "\n";
else
OS << " " << Text << std::string(9 - Text.length(), ' ') << ": "
<< values[Rank] << "\n";
};

printLine("MAX", 0);
const int percentages[] = {1, 5, 10, 20, 50, 80};
for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) {
printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]);
}
printLine("MIN", 100);
}

void printCFGContinuityStats(raw_ostream &OS,
iterator_range<function_iterator> &Functions) {
// Given a perfect profile, every positive-execution-count BB should be
// connected to an entry of the function through a positive-execution-count
// directed path in the control flow graph.
std::vector<size_t> NumUnreachables;
std::vector<size_t> SumECUnreachables;
std::vector<double> FractionECUnreachables;

for (auto it = Functions.begin(); it != Functions.end(); ++it) {
const BinaryFunction *Function = *it;
if (Function->size() <= 1)
continue;

// Compute the sum of all BB execution counts (ECs).
size_t NumPosECBBs = 0;
size_t SumAllBBEC = 0;
for (const BinaryBasicBlock &BB : *Function) {
const size_t BBEC = BB.getKnownExecutionCount();
NumPosECBBs += BBEC > 0 ? 1 : 0;
SumAllBBEC += BBEC;
}

// Perform BFS on subgraph of CFG induced by positive weight edges.
// Compute the number of BBs reachable from the entry(s) of the function and
// the sum of their execution counts (ECs).
std::unordered_map<unsigned, const BinaryBasicBlock *> IndexToBB;
std::unordered_set<unsigned> Visited;
std::queue<unsigned> Queue;
for (const BinaryBasicBlock &BB : *Function) {
// Make sure BB.getIndex() is not already in IndexToBB.
assert(IndexToBB.find(BB.getIndex()) == IndexToBB.end());
IndexToBB[BB.getIndex()] = &BB;
if (BB.isEntryPoint() && BB.getKnownExecutionCount() > 0) {
Queue.push(BB.getIndex());
Visited.insert(BB.getIndex());
}
}
while (!Queue.empty()) {
const unsigned BBIndex = Queue.front();
const BinaryBasicBlock *BB = IndexToBB[BBIndex];
Queue.pop();
auto SuccBIIter = BB->branch_info_begin();
for (const BinaryBasicBlock *Succ : BB->successors()) {
const uint64_t Count = SuccBIIter->Count;
if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) {
++SuccBIIter;
continue;
}
if (!Visited.insert(Succ->getIndex()).second) {
++SuccBIIter;
continue;
}
Queue.push(Succ->getIndex());
++SuccBIIter;
}
}

const size_t NumReachableBBs = Visited.size();

// Loop through Visited, and sum the corresponding BBs' execution counts
// (ECs).
size_t SumReachableBBEC = 0;
for (const unsigned BBIndex : Visited) {
const BinaryBasicBlock *BB = IndexToBB[BBIndex];
SumReachableBBEC += BB->getKnownExecutionCount();
}

const size_t NumPosECBBsUnreachableFromEntry =
NumPosECBBs - NumReachableBBs;
const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC;
const double FractionECUnreachable =
(double)SumUnreachableBBEC / SumAllBBEC;

if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) {
OS << "Non-trivial CFG discontinuity observed in function "
<< Function->getPrintName() << "\n";
LLVM_DEBUG(Function->dump());
}

NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry);
SumECUnreachables.push_back(SumUnreachableBBEC);
FractionECUnreachables.push_back(FractionECUnreachable);
}

if (FractionECUnreachables.empty())
return;

std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end());
const int Rank = int(FractionECUnreachables.size() * 0.95);
OS << format("top 5%% function CFG discontinuity is %.2lf%%\n",
FractionECUnreachables[Rank] * 100);

if (opts::Verbosity >= 1) {
OS << "abbreviations: EC = execution count, POS BBs = positive EC BBs\n"
<< "distribution of NUM(unreachable POS BBs) among all focal "
"functions\n";
printDistribution(OS, NumUnreachables);

OS << "distribution of SUM_EC(unreachable POS BBs) among all focal "
"functions\n";
printDistribution(OS, SumECUnreachables);

OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all "
"POS BBs))] among all focal functions\n";
printDistribution(OS, FractionECUnreachables, /*Fraction=*/true);
}
}

void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
size_t NumTopFunctions) {
// Sort the list of functions by execution counts (reverse).
llvm::sort(ValidFunctions,
[&](const BinaryFunction *A, const BinaryFunction *B) {
return A->getKnownExecutionCount() > B->getKnownExecutionCount();
});

const size_t RealNumTopFunctions =
std::min(NumTopFunctions, ValidFunctions.size());

iterator_range<function_iterator> Functions(
ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions);

BC.outs() << format("BOLT-INFO: among the hottest %zu functions ",
RealNumTopFunctions);
printCFGContinuityStats(BC.outs(), Functions);

// Print more detailed bucketed stats if requested.
if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) {
const size_t PerBucketSize = RealNumTopFunctions / 5;
BC.outs() << format(
"Detailed stats for 5 buckets, each with %zu functions:\n",
PerBucketSize);

// For each bucket, print the CFG continuity stats of the functions in the
// bucket.
for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) {
const size_t StartIndex = BucketIndex * PerBucketSize;
const size_t EndIndex = StartIndex + PerBucketSize;
iterator_range<function_iterator> Functions(
ValidFunctions.begin() + StartIndex,
ValidFunctions.begin() + EndIndex);
const size_t MaxFunctionExecutionCount =
ValidFunctions[StartIndex]->getKnownExecutionCount();
const size_t MinFunctionExecutionCount =
ValidFunctions[EndIndex - 1]->getKnownExecutionCount();
BC.outs() << format("----------------\n| Bucket %zu: "
"|\n----------------\n",
BucketIndex + 1)
<< format(
"execution counts of the %zu functions in the bucket: "
"%zu-%zu\n",
EndIndex - StartIndex, MinFunctionExecutionCount,
MaxFunctionExecutionCount);
printCFGContinuityStats(BC.outs(), Functions);
}
}
}
} // namespace

bool PrintContinuityStats::shouldOptimize(const BinaryFunction &BF) const {
if (BF.empty() || !BF.hasValidProfile())
return false;

return BinaryFunctionPass::shouldOptimize(BF);
}

Error PrintContinuityStats::runOnFunctions(BinaryContext &BC) {
// Create a list of functions with valid profiles.
FunctionListType ValidFunctions;
for (const auto &BFI : BC.getBinaryFunctions()) {
const BinaryFunction *Function = &BFI.second;
if (PrintContinuityStats::shouldOptimize(*Function))
ValidFunctions.push_back(Function);
}
if (ValidFunctions.empty() || opts::NumFunctionsForContinuityCheck == 0)
return Error::success();

printAll(BC, ValidFunctions, opts::NumFunctionsForContinuityCheck);
return Error::success();
}
3 changes: 3 additions & 0 deletions bolt/lib/Rewrite/BinaryPassManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "bolt/Passes/AllocCombiner.h"
#include "bolt/Passes/AsmDump.h"
#include "bolt/Passes/CMOVConversion.h"
#include "bolt/Passes/ContinuityStats.h"
#include "bolt/Passes/FixRISCVCallsPass.h"
#include "bolt/Passes/FixRelaxationPass.h"
#include "bolt/Passes/FrameOptimizer.h"
Expand Down Expand Up @@ -373,6 +374,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
if (opts::PrintProfileStats)
Manager.registerPass(std::make_unique<PrintProfileStats>(NeverPrint));

Manager.registerPass(std::make_unique<PrintContinuityStats>(NeverPrint));

Manager.registerPass(std::make_unique<ValidateInternalCalls>(NeverPrint));

Manager.registerPass(std::make_unique<ValidateMemRefs>(NeverPrint));
Expand Down
4 changes: 4 additions & 0 deletions bolt/test/X86/cfg-discontinuity-reporting.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## Check profile discontinuity reporting
RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe
RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s
CHECK: among the hottest 5 functions top 5% function CFG discontinuity is 100.00%
Loading