Skip to content

Commit 2215673

Browse files
committed
[BOLT] Setup CDSplit Pass Structure
This commit establishes the general structure of the CDSplit implementation without incorporating the exact splitting logic. Currently, all functions undergo hot-cold splitting based on the decisions made by the SplitFunctions pass. Subsequent commits will introduce the precise splitting logic.
1 parent 273ee29 commit 2215673

File tree

6 files changed

+300
-0
lines changed

6 files changed

+300
-0
lines changed

bolt/include/bolt/Passes/CDSplit.h

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
//===- bolt/Passes/CDSplit.h - Split functions into hot/warm/cold
2+
// after function reordering pass -------*- C++ -*-===//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#ifndef BOLT_PASSES_CDSPLIT
11+
#define BOLT_PASSES_CDSPLIT
12+
13+
#include "bolt/Passes/SplitFunctions.h"
14+
#include <atomic>
15+
16+
namespace llvm {
17+
namespace bolt {
18+
19+
using BasicBlockOrder = BinaryFunction::BasicBlockOrderType;
20+
21+
class CDSplit : public BinaryFunctionPass {
22+
private:
23+
/// Overall stats.
24+
std::atomic<uint64_t> SplitBytesHot{0ull};
25+
std::atomic<uint64_t> SplitBytesCold{0ull};
26+
27+
/// List of functions to be considered.
28+
/// All functions in the list are used to construct a call graph.
29+
/// A subset of functions in this list are considered for splitting.
30+
std::vector<BinaryFunction *> FunctionsToConsider;
31+
32+
/// Helper functions to initialize global variables.
33+
void initialize(BinaryContext &BC);
34+
35+
/// Split function body into 3 fragments: hot / warm / cold.
36+
void runOnFunction(BinaryFunction &BF);
37+
38+
/// Assign each basic block in the given function to either hot, cold,
39+
/// or warm fragment using the CDSplit algorithm.
40+
void assignFragmentThreeWay(const BinaryFunction &BF,
41+
const BasicBlockOrder &BlockOrder);
42+
43+
/// Find the best split index that separates hot from warm.
44+
/// The basic block whose index equals the returned split index will be the
45+
/// last hot block.
46+
size_t findSplitIndex(const BinaryFunction &BF,
47+
const BasicBlockOrder &BlockOrder);
48+
49+
public:
50+
explicit CDSplit(const cl::opt<bool> &PrintPass)
51+
: BinaryFunctionPass(PrintPass) {}
52+
53+
bool shouldOptimize(const BinaryFunction &BF) const override;
54+
55+
const char *getName() const override { return "cdsplit"; }
56+
57+
void runOnFunctions(BinaryContext &BC) override;
58+
};
59+
60+
} // namespace bolt
61+
} // namespace llvm
62+
63+
#endif

bolt/lib/Passes/CDSplit.cpp

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
//===- bolt/Passes/CDSplit.cpp - Pass for splitting function code 3-way
2+
//--===//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This file implements the CDSplit pass.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "bolt/Passes/CDSplit.h"
15+
#include "bolt/Core/ParallelUtilities.h"
16+
#include "bolt/Utils/CommandLineOpts.h"
17+
#include "llvm/ADT/SmallVector.h"
18+
#include "llvm/MC/MCInst.h"
19+
#include "llvm/Support/MathExtras.h"
20+
21+
#define DEBUG_TYPE "bolt-opts"
22+
23+
using namespace llvm;
24+
using namespace bolt;
25+
26+
namespace opts {
27+
28+
extern cl::OptionCategory BoltOptCategory;
29+
30+
extern cl::opt<bool> UseCDSplit;
31+
extern cl::opt<bool> SplitEH;
32+
extern cl::opt<unsigned> ExecutionCountThreshold;
33+
} // namespace opts
34+
35+
namespace llvm {
36+
namespace bolt {
37+
38+
namespace {
39+
/// Return true if the function should be considered for building call graph.
40+
bool shouldConsider(const BinaryFunction &BF) {
41+
return BF.hasValidIndex() && BF.hasValidProfile() && !BF.empty();
42+
}
43+
} // anonymous namespace
44+
45+
bool CDSplit::shouldOptimize(const BinaryFunction &BF) const {
46+
// Do not split functions with a small execution count.
47+
if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold)
48+
return false;
49+
50+
// Do not split functions with at least one block that has no known
51+
// execution count due to incomplete information.
52+
// Do not split functions with only zero-execution count blocks
53+
// as there is not enough variation in block count to justify splitting.
54+
if (!BF.hasFullProfile() || BF.allBlocksCold())
55+
return false;
56+
57+
return BinaryFunctionPass::shouldOptimize(BF);
58+
}
59+
60+
/// Initialize algorithm's metadata.
61+
void CDSplit::initialize(BinaryContext &BC) {
62+
// Construct a list of functions that are considered for building call graph.
63+
// Only those in this list that evaluates true for shouldOptimize are
64+
// candidates for 3-way splitting.
65+
std::vector<BinaryFunction *> SortedFunctions = BC.getSortedFunctions();
66+
FunctionsToConsider.reserve(SortedFunctions.size());
67+
for (BinaryFunction *BF : SortedFunctions) {
68+
if (shouldConsider(*BF))
69+
FunctionsToConsider.push_back(BF);
70+
}
71+
}
72+
73+
/// Find the best index for splitting. The returned value is the index of the
74+
/// last hot basic block. Hence, "no splitting" is equivalent to returning the
75+
/// value which is one less than the size of the function.
76+
size_t CDSplit::findSplitIndex(const BinaryFunction &BF,
77+
const BasicBlockOrder &BlockOrder) {
78+
// Placeholder: hot-cold splitting.
79+
return BF.getLayout().getMainFragment().size() - 1;
80+
}
81+
82+
/// Assign each basic block in the given function to either hot, cold,
83+
/// or warm fragment using the CDSplit algorithm.
84+
void CDSplit::assignFragmentThreeWay(const BinaryFunction &BF,
85+
const BasicBlockOrder &BlockOrder) {
86+
size_t BestSplitIndex = findSplitIndex(BF, BlockOrder);
87+
88+
// Assign fragments based on the computed best split index.
89+
// All basic blocks with index up to the best split index become hot.
90+
// All remaining blocks are warm / cold depending on if count is
91+
// greater than 0 or not.
92+
FragmentNum Main(0);
93+
FragmentNum Warm(1);
94+
FragmentNum Cold(2);
95+
for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
96+
BinaryBasicBlock *BB = BlockOrder[Index];
97+
if (Index <= BestSplitIndex)
98+
BB->setFragmentNum(Main);
99+
else
100+
BB->setFragmentNum(BB->getKnownExecutionCount() > 0 ? Warm : Cold);
101+
}
102+
}
103+
104+
void CDSplit::runOnFunction(BinaryFunction &BF) {
105+
assert(!BF.empty() && "splitting an empty function");
106+
107+
FunctionLayout &Layout = BF.getLayout();
108+
BinaryContext &BC = BF.getBinaryContext();
109+
110+
BasicBlockOrder NewLayout(Layout.block_begin(), Layout.block_end());
111+
// Never outline the first basic block.
112+
NewLayout.front()->setCanOutline(false);
113+
for (BinaryBasicBlock *BB : NewLayout) {
114+
if (!BB->canOutline())
115+
continue;
116+
117+
// Do not split extra entry points in aarch64. They can be referred by
118+
// using ADRs and when this happens, these blocks cannot be placed far
119+
// away due to the limited range in ADR instruction.
120+
if (BC.isAArch64() && BB->isEntryPoint()) {
121+
BB->setCanOutline(false);
122+
continue;
123+
}
124+
125+
if (BF.hasEHRanges() && !opts::SplitEH) {
126+
// We cannot move landing pads (or rather entry points for landing pads).
127+
if (BB->isLandingPad()) {
128+
BB->setCanOutline(false);
129+
continue;
130+
}
131+
// We cannot move a block that can throw since exception-handling
132+
// runtime cannot deal with split functions. However, if we can guarantee
133+
// that the block never throws, it is safe to move the block to
134+
// decrease the size of the function.
135+
for (MCInst &Instr : *BB) {
136+
if (BC.MIB->isInvoke(Instr)) {
137+
BB->setCanOutline(false);
138+
break;
139+
}
140+
}
141+
}
142+
}
143+
144+
// Assign each basic block in NewLayout to either hot, warm, or cold fragment.
145+
assignFragmentThreeWay(BF, NewLayout);
146+
147+
// Make sure all non-outlineable blocks are in the main-fragment.
148+
for (BinaryBasicBlock *BB : NewLayout) {
149+
if (!BB->canOutline())
150+
BB->setFragmentNum(FragmentNum::main());
151+
}
152+
153+
// In case any non-outlineable blocks previously in warm or cold is now set
154+
// to be in main by the preceding for loop, move them to the end of main.
155+
llvm::stable_sort(NewLayout,
156+
[&](const BinaryBasicBlock *L, const BinaryBasicBlock *R) {
157+
return L->getFragmentNum() < R->getFragmentNum();
158+
});
159+
160+
BF.getLayout().update(NewLayout);
161+
162+
// For shared objects, invoke instructions and corresponding landing pads
163+
// have to be placed in the same fragment. When we split them, create
164+
// trampoline landing pads that will redirect the execution to real LPs.
165+
SplitFunctions::TrampolineSetType Trampolines;
166+
if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit())
167+
Trampolines = SplitFunctions::createEHTrampolines(BF);
168+
169+
if (BC.isX86() && BF.isSplit()) {
170+
size_t HotSize;
171+
size_t ColdSize;
172+
std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF);
173+
SplitBytesHot += HotSize;
174+
SplitBytesCold += ColdSize;
175+
}
176+
}
177+
178+
void CDSplit::runOnFunctions(BinaryContext &BC) {
179+
if (!opts::UseCDSplit)
180+
return;
181+
182+
// Initialize global variables.
183+
initialize(BC);
184+
185+
// Only functions satisfying shouldConsider and shouldOptimize are candidates
186+
// for splitting.
187+
ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
188+
return !(shouldConsider(BF) && shouldOptimize(BF));
189+
};
190+
191+
// Make function splitting decisions in parallel.
192+
ParallelUtilities::runOnEachFunction(
193+
BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR,
194+
[&](BinaryFunction &BF) { runOnFunction(BF); }, SkipFunc, "CDSplit",
195+
/*ForceSequential=*/false);
196+
197+
if (SplitBytesHot + SplitBytesCold > 0) {
198+
outs() << "BOLT-INFO: cdsplit separates " << SplitBytesHot
199+
<< " hot bytes from " << SplitBytesCold << " cold bytes "
200+
<< format("(%.2lf%% of split functions is in the main fragment)\n",
201+
100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold));
202+
203+
} else
204+
outs() << "BOLT-INFO: cdsplit didn't split any functions\n";
205+
}
206+
207+
} // namespace bolt
208+
} // namespace llvm

bolt/lib/Passes/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ add_llvm_library(LLVMBOLTPasses
99
CacheMetrics.cpp
1010
CallGraph.cpp
1111
CallGraphWalker.cpp
12+
CDSplit.cpp
1213
DataflowAnalysis.cpp
1314
DataflowInfoManager.cpp
1415
FrameAnalysis.cpp

bolt/lib/Passes/SplitFunctions.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ extern cl::OptionCategory BoltOptCategory;
6060
extern cl::opt<bool> SplitEH;
6161
extern cl::opt<unsigned> ExecutionCountThreshold;
6262
extern cl::opt<uint32_t> RandomSeed;
63+
extern cl::opt<bool> UseCDSplit;
6364

6465
static cl::opt<bool> AggressiveSplitting(
6566
"split-all-cold", cl::desc("outline as many cold basic blocks as possible"),
@@ -231,6 +232,17 @@ bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const {
231232
}
232233

233234
void SplitFunctions::runOnFunctions(BinaryContext &BC) {
235+
if (opts::UseCDSplit &&
236+
!(opts::SplitFunctions &&
237+
opts::SplitStrategy == SplitFunctionsStrategy::Profile2)) {
238+
errs() << "BOLT-ERROR: -use-cdsplit should be applied together with "
239+
"-split-functions using default -split-strategy=profile2. "
240+
"-split-functions 2-way splits functions before the function "
241+
"reordering pass, while -use-cdsplit 3-way splits functions "
242+
"after the function reordering pass. \n";
243+
exit(1);
244+
}
245+
234246
if (!opts::SplitFunctions)
235247
return;
236248

bolt/lib/Rewrite/BinaryPassManager.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "bolt/Passes/Aligner.h"
1212
#include "bolt/Passes/AllocCombiner.h"
1313
#include "bolt/Passes/AsmDump.h"
14+
#include "bolt/Passes/CDSplit.h"
1415
#include "bolt/Passes/CMOVConversion.h"
1516
#include "bolt/Passes/FixRISCVCallsPass.h"
1617
#include "bolt/Passes/FixRelaxationPass.h"
@@ -182,6 +183,10 @@ static cl::opt<bool>
182183
PrintSplit("print-split", cl::desc("print functions after code splitting"),
183184
cl::Hidden, cl::cat(BoltOptCategory));
184185

186+
static cl::opt<bool> PrintCDSplit("print-cdsplit",
187+
cl::desc("print functions after cdsplit"),
188+
cl::Hidden, cl::cat(BoltOptCategory));
189+
185190
static cl::opt<bool>
186191
PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
187192
cl::Hidden, cl::cat(BoltOptCategory));
@@ -430,6 +435,11 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
430435
Manager.registerPass(
431436
std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
432437

438+
/// This pass three-way splits functions after function reordering.
439+
Manager.registerPass(std::make_unique<CDSplit>(PrintCDSplit));
440+
441+
Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
442+
433443
// Print final dyno stats right while CFG and instruction analysis are intact.
434444
Manager.registerPass(
435445
std::make_unique<DynoStatsPrintPass>(

bolt/lib/Utils/CommandLineOpts.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,12 @@ cl::opt<unsigned>
191191
cl::init(0), cl::ZeroOrMore, cl::cat(BoltCategory),
192192
cl::sub(cl::SubCommand::getAll()));
193193

194+
cl::opt<bool>
195+
UseCDSplit("use-cdsplit",
196+
cl::desc("split functions into 3 fragments using the CDSplit "
197+
"algorithm after function reordering pass"),
198+
cl::init(false), cl::cat(BoltOptCategory));
199+
194200
bool processAllFunctions() {
195201
if (opts::AggregateOnly)
196202
return false;

0 commit comments

Comments
 (0)