Skip to content

Commit 49edfec

Browse files
authored
Merge branch 'main' into inbelic/wave-read-at-spirv
2 parents f181e27 + a4916d2 commit 49edfec

File tree

3,374 files changed

+100917
-55402
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

3,374 files changed

+100917
-55402
lines changed

.ci/generate-buildkite-pipeline-premerge

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -191,14 +191,17 @@ function keep-modified-projects() {
191191
}
192192

193193
function check-targets() {
194+
# Do not use "check-all" here because if there is "check-all" plus a
195+
# project specific target like "check-clang", that project's tests
196+
# will be run twice.
194197
projects=${@}
195198
for project in ${projects}; do
196199
case ${project} in
197200
clang-tools-extra)
198201
echo "check-clang-tools"
199202
;;
200203
compiler-rt)
201-
echo "check-all"
204+
echo "check-compiler-rt"
202205
;;
203206
cross-project-tests)
204207
echo "check-cross-project"
@@ -216,10 +219,10 @@ function check-targets() {
216219
echo "check-lldb"
217220
;;
218221
pstl)
219-
echo "check-all"
222+
# Currently we do not run pstl tests in CI.
220223
;;
221224
libclc)
222-
echo "check-all"
225+
# Currently there is no testing for libclc.
223226
;;
224227
*)
225228
echo "check-${project}"

.github/new-prs-labeler.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ PGO:
6969
- llvm/**/llvm-profdata/**/*
7070
- llvm/**/llvm-profgen/**/*
7171

72-
vectorization:
72+
vectorizers:
7373
- llvm/lib/Transforms/Vectorize/**/*
7474
- llvm/include/llvm/Transforms/Vectorize/**/*
7575

.github/workflows/release-binaries.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ jobs:
328328
run: |
329329
# Build some of the mlir tools that take a long time to link
330330
if [ "${{ needs.prepare.outputs.build-flang }}" = "true" ]; then
331-
ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/ -j2 flang-new bbc
331+
ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/ -j2 flang bbc
332332
fi
333333
ninja -C ${{ steps.setup-stage.outputs.build-prefix }}/build/tools/clang/stage2-bins/ \
334334
mlir-bytecode-parser-fuzzer \

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ autoconf/autom4te.cache
5151
/CMakeSettings.json
5252
# CLion project configuration
5353
/.idea
54+
/cmake-build*
5455

5556
#==============================================================================#
5657
# Directories to ignore (do not add trailing '/'s, they skip symlinks).
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
//===- bolt/Passes/ContinuityStats.h ----------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass checks how well the BOLT input profile satisfies the following
10+
// "CFG continuity" property of a perfect profile:
11+
//
12+
// Each positive-execution-count block in the function’s CFG
13+
// should be *reachable* from a positive-execution-count function
14+
// entry block through a positive-execution-count path.
15+
//
16+
// More specifically, for each of the hottest 1000 functions, the pass
17+
// calculates the function’s fraction of basic block execution counts
18+
// that is *unreachable*. It then reports the 95th percentile of the
19+
// distribution of the 1000 unreachable fractions in a single BOLT-INFO line.
20+
// The smaller the reported value is, the better the BOLT profile
21+
// satisfies the CFG continuity property.
22+
23+
// The default value of 1000 above can be changed via the hidden BOLT option
24+
// `-num-functions-for-continuity-check=[N]`.
25+
// If more detailed stats are needed, `-v=1` can be used: the hottest N
26+
// functions will be grouped into 5 equally-sized buckets, from the hottest
27+
// to the coldest; for each bucket, various summary statistics of the
28+
// distribution of the unreachable fractions and the raw unreachable execution
29+
// counts will be reported.
30+
//
31+
//===----------------------------------------------------------------------===//
32+
33+
#ifndef BOLT_PASSES_CONTINUITYSTATS_H
34+
#define BOLT_PASSES_CONTINUITYSTATS_H
35+
36+
#include "bolt/Passes/BinaryPasses.h"
37+
#include <vector>
38+
39+
namespace llvm {
40+
41+
class raw_ostream;
42+
43+
namespace bolt {
44+
class BinaryContext;
45+
46+
/// Compute and report to the user the function CFG continuity quality
47+
class PrintContinuityStats : public BinaryFunctionPass {
48+
public:
49+
explicit PrintContinuityStats(const cl::opt<bool> &PrintPass)
50+
: BinaryFunctionPass(PrintPass) {}
51+
52+
bool shouldOptimize(const BinaryFunction &BF) const override;
53+
const char *getName() const override { return "continuity-stats"; }
54+
bool shouldPrint(const BinaryFunction &) const override { return false; }
55+
Error runOnFunctions(BinaryContext &BC) override;
56+
};
57+
58+
} // namespace bolt
59+
} // namespace llvm
60+
61+
#endif // BOLT_PASSES_CONTINUITYSTATS_H

bolt/lib/Passes/ADRRelaxationPass.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,14 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) {
5656
continue;
5757
}
5858

59-
// Don't relax adr if it points to the same function and it is not split
60-
// and BF initial size is < 1MB.
59+
// Don't relax ADR if it points to the same function and is in the main
60+
// fragment and BF initial size is < 1MB.
6161
const unsigned OneMB = 0x100000;
6262
if (BF.getSize() < OneMB) {
6363
BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol);
64-
if (TargetBF == &BF && !BF.isSplit())
64+
if (TargetBF == &BF && !BB.isSplit())
6565
continue;
66+
6667
// No relaxation needed if ADR references a basic block in the same
6768
// fragment.
6869
if (BinaryBasicBlock *TargetBB = BF.getBasicBlockForLabel(Symbol))

bolt/lib/Passes/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ add_llvm_library(LLVMBOLTPasses
2626
PatchEntries.cpp
2727
PettisAndHansen.cpp
2828
PLTCall.cpp
29+
ContinuityStats.cpp
2930
RegAnalysis.cpp
3031
RegReAssign.cpp
3132
ReorderAlgorithm.cpp

bolt/lib/Passes/ContinuityStats.cpp

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
//===- bolt/Passes/ContinuityStats.cpp --------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file implements the continuity stats calculation pass.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "bolt/Passes/ContinuityStats.h"
14+
#include "bolt/Core/BinaryBasicBlock.h"
15+
#include "bolt/Core/BinaryFunction.h"
16+
#include "bolt/Utils/CommandLineOpts.h"
17+
#include "llvm/Support/CommandLine.h"
18+
#include <queue>
19+
#include <unordered_map>
20+
#include <unordered_set>
21+
22+
#define DEBUG_TYPE "bolt-opts"
23+
24+
using namespace llvm;
25+
using namespace bolt;
26+
27+
namespace opts {
28+
extern cl::opt<unsigned> Verbosity;
29+
cl::opt<unsigned> NumFunctionsForContinuityCheck(
30+
"num-functions-for-continuity-check",
31+
cl::desc("number of hottest functions to print aggregated "
32+
"CFG discontinuity stats of."),
33+
cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
34+
} // namespace opts
35+
36+
namespace {
37+
using FunctionListType = std::vector<const BinaryFunction *>;
38+
using function_iterator = FunctionListType::iterator;
39+
40+
template <typename T>
41+
void printDistribution(raw_ostream &OS, std::vector<T> &values,
42+
bool Fraction = false) {
43+
if (values.empty())
44+
return;
45+
// Sort values from largest to smallest and print the MAX, TOP 1%, 5%, 10%,
46+
// 20%, 50%, 80%, MIN. If Fraction is true, then values are printed as
47+
// fractions instead of integers.
48+
std::sort(values.begin(), values.end());
49+
50+
auto printLine = [&](std::string Text, double Percent) {
51+
int Rank = int(values.size() * (1.0 - Percent / 100));
52+
if (Percent == 0)
53+
Rank = values.size() - 1;
54+
if (Fraction)
55+
OS << " " << Text << std::string(9 - Text.length(), ' ') << ": "
56+
<< format("%.2lf%%", values[Rank] * 100) << "\n";
57+
else
58+
OS << " " << Text << std::string(9 - Text.length(), ' ') << ": "
59+
<< values[Rank] << "\n";
60+
};
61+
62+
printLine("MAX", 0);
63+
const int percentages[] = {1, 5, 10, 20, 50, 80};
64+
for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) {
65+
printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]);
66+
}
67+
printLine("MIN", 100);
68+
}
69+
70+
void printCFGContinuityStats(raw_ostream &OS,
71+
iterator_range<function_iterator> &Functions) {
72+
// Given a perfect profile, every positive-execution-count BB should be
73+
// connected to an entry of the function through a positive-execution-count
74+
// directed path in the control flow graph.
75+
std::vector<size_t> NumUnreachables;
76+
std::vector<size_t> SumECUnreachables;
77+
std::vector<double> FractionECUnreachables;
78+
79+
for (auto it = Functions.begin(); it != Functions.end(); ++it) {
80+
const BinaryFunction *Function = *it;
81+
if (Function->size() <= 1)
82+
continue;
83+
84+
// Compute the sum of all BB execution counts (ECs).
85+
size_t NumPosECBBs = 0;
86+
size_t SumAllBBEC = 0;
87+
for (const BinaryBasicBlock &BB : *Function) {
88+
const size_t BBEC = BB.getKnownExecutionCount();
89+
NumPosECBBs += BBEC > 0 ? 1 : 0;
90+
SumAllBBEC += BBEC;
91+
}
92+
93+
// Perform BFS on subgraph of CFG induced by positive weight edges.
94+
// Compute the number of BBs reachable from the entry(s) of the function and
95+
// the sum of their execution counts (ECs).
96+
std::unordered_map<unsigned, const BinaryBasicBlock *> IndexToBB;
97+
std::unordered_set<unsigned> Visited;
98+
std::queue<unsigned> Queue;
99+
for (const BinaryBasicBlock &BB : *Function) {
100+
// Make sure BB.getIndex() is not already in IndexToBB.
101+
assert(IndexToBB.find(BB.getIndex()) == IndexToBB.end());
102+
IndexToBB[BB.getIndex()] = &BB;
103+
if (BB.isEntryPoint() && BB.getKnownExecutionCount() > 0) {
104+
Queue.push(BB.getIndex());
105+
Visited.insert(BB.getIndex());
106+
}
107+
}
108+
while (!Queue.empty()) {
109+
const unsigned BBIndex = Queue.front();
110+
const BinaryBasicBlock *BB = IndexToBB[BBIndex];
111+
Queue.pop();
112+
auto SuccBIIter = BB->branch_info_begin();
113+
for (const BinaryBasicBlock *Succ : BB->successors()) {
114+
const uint64_t Count = SuccBIIter->Count;
115+
if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) {
116+
++SuccBIIter;
117+
continue;
118+
}
119+
if (!Visited.insert(Succ->getIndex()).second) {
120+
++SuccBIIter;
121+
continue;
122+
}
123+
Queue.push(Succ->getIndex());
124+
++SuccBIIter;
125+
}
126+
}
127+
128+
const size_t NumReachableBBs = Visited.size();
129+
130+
// Loop through Visited, and sum the corresponding BBs' execution counts
131+
// (ECs).
132+
size_t SumReachableBBEC = 0;
133+
for (const unsigned BBIndex : Visited) {
134+
const BinaryBasicBlock *BB = IndexToBB[BBIndex];
135+
SumReachableBBEC += BB->getKnownExecutionCount();
136+
}
137+
138+
const size_t NumPosECBBsUnreachableFromEntry =
139+
NumPosECBBs - NumReachableBBs;
140+
const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC;
141+
const double FractionECUnreachable =
142+
(double)SumUnreachableBBEC / SumAllBBEC;
143+
144+
if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) {
145+
OS << "Non-trivial CFG discontinuity observed in function "
146+
<< Function->getPrintName() << "\n";
147+
LLVM_DEBUG(Function->dump());
148+
}
149+
150+
NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry);
151+
SumECUnreachables.push_back(SumUnreachableBBEC);
152+
FractionECUnreachables.push_back(FractionECUnreachable);
153+
}
154+
155+
if (FractionECUnreachables.empty())
156+
return;
157+
158+
std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end());
159+
const int Rank = int(FractionECUnreachables.size() * 0.95);
160+
OS << format("top 5%% function CFG discontinuity is %.2lf%%\n",
161+
FractionECUnreachables[Rank] * 100);
162+
163+
if (opts::Verbosity >= 1) {
164+
OS << "abbreviations: EC = execution count, POS BBs = positive EC BBs\n"
165+
<< "distribution of NUM(unreachable POS BBs) among all focal "
166+
"functions\n";
167+
printDistribution(OS, NumUnreachables);
168+
169+
OS << "distribution of SUM_EC(unreachable POS BBs) among all focal "
170+
"functions\n";
171+
printDistribution(OS, SumECUnreachables);
172+
173+
OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all "
174+
"POS BBs))] among all focal functions\n";
175+
printDistribution(OS, FractionECUnreachables, /*Fraction=*/true);
176+
}
177+
}
178+
179+
void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
180+
size_t NumTopFunctions) {
181+
// Sort the list of functions by execution counts (reverse).
182+
llvm::sort(ValidFunctions,
183+
[&](const BinaryFunction *A, const BinaryFunction *B) {
184+
return A->getKnownExecutionCount() > B->getKnownExecutionCount();
185+
});
186+
187+
const size_t RealNumTopFunctions =
188+
std::min(NumTopFunctions, ValidFunctions.size());
189+
190+
iterator_range<function_iterator> Functions(
191+
ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions);
192+
193+
BC.outs() << format("BOLT-INFO: among the hottest %zu functions ",
194+
RealNumTopFunctions);
195+
printCFGContinuityStats(BC.outs(), Functions);
196+
197+
// Print more detailed bucketed stats if requested.
198+
if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) {
199+
const size_t PerBucketSize = RealNumTopFunctions / 5;
200+
BC.outs() << format(
201+
"Detailed stats for 5 buckets, each with %zu functions:\n",
202+
PerBucketSize);
203+
204+
// For each bucket, print the CFG continuity stats of the functions in the
205+
// bucket.
206+
for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) {
207+
const size_t StartIndex = BucketIndex * PerBucketSize;
208+
const size_t EndIndex = StartIndex + PerBucketSize;
209+
iterator_range<function_iterator> Functions(
210+
ValidFunctions.begin() + StartIndex,
211+
ValidFunctions.begin() + EndIndex);
212+
const size_t MaxFunctionExecutionCount =
213+
ValidFunctions[StartIndex]->getKnownExecutionCount();
214+
const size_t MinFunctionExecutionCount =
215+
ValidFunctions[EndIndex - 1]->getKnownExecutionCount();
216+
BC.outs() << format("----------------\n| Bucket %zu: "
217+
"|\n----------------\n",
218+
BucketIndex + 1)
219+
<< format(
220+
"execution counts of the %zu functions in the bucket: "
221+
"%zu-%zu\n",
222+
EndIndex - StartIndex, MinFunctionExecutionCount,
223+
MaxFunctionExecutionCount);
224+
printCFGContinuityStats(BC.outs(), Functions);
225+
}
226+
}
227+
}
228+
} // namespace
229+
230+
bool PrintContinuityStats::shouldOptimize(const BinaryFunction &BF) const {
231+
if (BF.empty() || !BF.hasValidProfile())
232+
return false;
233+
234+
return BinaryFunctionPass::shouldOptimize(BF);
235+
}
236+
237+
Error PrintContinuityStats::runOnFunctions(BinaryContext &BC) {
238+
// Create a list of functions with valid profiles.
239+
FunctionListType ValidFunctions;
240+
for (const auto &BFI : BC.getBinaryFunctions()) {
241+
const BinaryFunction *Function = &BFI.second;
242+
if (PrintContinuityStats::shouldOptimize(*Function))
243+
ValidFunctions.push_back(Function);
244+
}
245+
if (ValidFunctions.empty() || opts::NumFunctionsForContinuityCheck == 0)
246+
return Error::success();
247+
248+
printAll(BC, ValidFunctions, opts::NumFunctionsForContinuityCheck);
249+
return Error::success();
250+
}

0 commit comments

Comments
 (0)