Skip to content

Commit e9e9c2d

Browse files
committed
[BOLT] Add structure of CDSplit to SplitFunctions
This commit establishes the general structure of the CDSplit strategy in SplitFunctions without incorporating the exact splitting logic. With -split-functions -split-strategy=cdsplit, the SplitFunctions pass will run twice: the first time is before function reordering and functions are hot-cold split; the second time is after function reordering and functions are hot-warm-cold split based on the fixed function ordering. Currently, all functions are hot-warm split after the entry block in the second splitting pass. Subsequent commits will introduce the precise splitting logic.
1 parent d333c0e commit e9e9c2d

File tree

5 files changed

+101
-3
lines changed

5 files changed

+101
-3
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,9 @@ class BinaryContext {
611611
/// Indicates if the binary contains split functions.
612612
bool HasSplitFunctions{false};
613613

614+
/// Indicates if the function ordering of the binary is finalized.
615+
bool HasFinalizedFunctionOrder{false};
616+
614617
/// Is the binary always loaded at a fixed address. Shared objects and
615618
/// position-independent executables (PIEs) are examples of binaries that
616619
/// will have HasFixedLoadAddress set to false.

bolt/include/bolt/Passes/SplitFunctions.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ enum SplitFunctionsStrategy : char {
2323
/// Split each function into a hot and cold fragment using profiling
2424
/// information.
2525
Profile2 = 0,
26+
/// Split each function into a hot, warm, and cold fragment using
27+
/// profiling information.
28+
CDSplit,
2629
/// Split each function into a hot and cold fragment at a randomly chosen
2730
/// split point (ignoring any available profiling information).
2831
Random2,
@@ -41,6 +44,9 @@ class SplitStrategy {
4144
virtual ~SplitStrategy() = default;
4245
virtual bool canSplit(const BinaryFunction &BF) = 0;
4346
virtual bool keepEmpty() = 0;
47+
// When autoReversal() == true, check if the new main fragment after splitting
48+
// is of a smaller size; if not, revert splitting.
49+
virtual bool autoReversal() = 0;
4450
virtual void fragment(const BlockIt Start, const BlockIt End) = 0;
4551
};
4652

bolt/lib/Passes/ReorderFunctions.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,8 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
427427

428428
reorder(std::move(Clusters), BFs);
429429

430+
BC.HasFinalizedFunctionOrder = true;
431+
430432
std::unique_ptr<std::ofstream> FuncsFile;
431433
if (!opts::GenerateFunctionOrderFile.empty()) {
432434
FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,

bolt/lib/Passes/SplitFunctions.cpp

Lines changed: 83 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ static cl::opt<SplitFunctionsStrategy> SplitStrategy(
9292
cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2",
9393
"split each function into a hot and cold fragment "
9494
"using profiling information")),
95+
cl::values(clEnumValN(SplitFunctionsStrategy::CDSplit, "cdsplit",
96+
"split each function into a hot, warm, and cold "
97+
"fragment using profiling information")),
9598
cl::values(clEnumValN(
9699
SplitFunctionsStrategy::Random2, "random2",
97100
"split each function into a hot and cold fragment at a randomly chosen "
@@ -106,6 +109,11 @@ static cl::opt<SplitFunctionsStrategy> SplitStrategy(
106109
"fragment contains exactly a single basic block")),
107110
cl::desc("strategy used to partition blocks into fragments"),
108111
cl::cat(BoltOptCategory));
112+
113+
bool threeWaySplit() {
114+
return opts::SplitFunctions &&
115+
opts::SplitStrategy == SplitFunctionsStrategy::CDSplit;
116+
}
109117
} // namespace opts
110118

111119
namespace {
@@ -126,7 +134,12 @@ struct SplitProfile2 final : public SplitStrategy {
126134
return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
127135
}
128136

129-
bool keepEmpty() override { return false; }
137+
bool keepEmpty() override {
138+
return opts::SplitStrategy != SplitFunctionsStrategy::CDSplit ? false
139+
: true;
140+
}
141+
142+
bool autoReversal() override { return true; }
130143

131144
void fragment(const BlockIt Start, const BlockIt End) override {
132145
for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) {
@@ -136,6 +149,55 @@ struct SplitProfile2 final : public SplitStrategy {
136149
}
137150
};
138151

152+
struct SplitCacheDirected final : public SplitStrategy {
153+
BinaryContext &BC;
154+
using BasicBlockOrder = BinaryFunction::BasicBlockOrderType;
155+
156+
explicit SplitCacheDirected(BinaryContext &BC) : BC(BC) {}
157+
158+
bool canSplit(const BinaryFunction &BF) override {
159+
return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
160+
}
161+
162+
bool keepEmpty() override { return true; }
163+
164+
// This strategy does not require that the new hot fragment size strictly
165+
// decreases after splitting.
166+
bool autoReversal() override { return false; }
167+
168+
void fragment(const BlockIt Start, const BlockIt End) override {
169+
BasicBlockOrder BlockOrder(Start, End);
170+
BinaryFunction &BF = *BlockOrder.front()->getFunction();
171+
172+
size_t BestSplitIndex = findSplitIndex(BF, BlockOrder);
173+
174+
// Assign fragments based on the computed best split index.
175+
// All basic blocks with index up to the best split index become hot.
176+
// All remaining blocks are warm / cold depending on if count is
177+
// greater than 0 or not.
178+
FragmentNum Main(0);
179+
FragmentNum Warm(1);
180+
FragmentNum Cold(2);
181+
for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
182+
BinaryBasicBlock *BB = BlockOrder[Index];
183+
if (Index <= BestSplitIndex)
184+
BB->setFragmentNum(Main);
185+
else
186+
BB->setFragmentNum(BB->getKnownExecutionCount() > 0 ? Warm : Cold);
187+
}
188+
}
189+
190+
private:
191+
/// Find the best index for splitting. The returned value is the index of the
192+
/// last hot basic block. Hence, "no splitting" is equivalent to returning the
193+
/// value which is one less than the size of the function.
194+
size_t findSplitIndex(const BinaryFunction &BF,
195+
const BasicBlockOrder &BlockOrder) {
196+
// Placeholder: hot-warm split after entry block.
197+
return 0;
198+
}
199+
};
200+
139201
struct SplitRandom2 final : public SplitStrategy {
140202
std::minstd_rand0 Gen;
141203

@@ -145,6 +207,8 @@ struct SplitRandom2 final : public SplitStrategy {
145207

146208
bool keepEmpty() override { return false; }
147209

210+
bool autoReversal() override { return true; }
211+
148212
void fragment(const BlockIt Start, const BlockIt End) override {
149213
using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
150214
const DiffT NumBlocks = End - Start;
@@ -172,6 +236,8 @@ struct SplitRandomN final : public SplitStrategy {
172236

173237
bool keepEmpty() override { return false; }
174238

239+
bool autoReversal() override { return true; }
240+
175241
void fragment(const BlockIt Start, const BlockIt End) override {
176242
using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
177243
const DiffT NumBlocks = End - Start;
@@ -223,6 +289,8 @@ struct SplitAll final : public SplitStrategy {
223289
return true;
224290
}
225291

292+
bool autoReversal() override { return true; }
293+
226294
void fragment(const BlockIt Start, const BlockIt End) override {
227295
unsigned Fragment = 0;
228296
for (BinaryBasicBlock *const BB : llvm::make_range(Start, End))
@@ -250,6 +318,16 @@ void SplitFunctions::runOnFunctions(BinaryContext &BC) {
250318
bool ForceSequential = false;
251319

252320
switch (opts::SplitStrategy) {
321+
case SplitFunctionsStrategy::CDSplit:
322+
// CDSplit runs two splitting passes: hot-cold splitting (SplitPrfoile2)
323+
// before function reordering and hot-warm-cold splitting
324+
// (SplitCacheDirected) after function reordering.
325+
if (BC.HasFinalizedFunctionOrder)
326+
Strategy = std::make_unique<SplitCacheDirected>(BC);
327+
else
328+
Strategy = std::make_unique<SplitProfile2>();
329+
opts::AggressiveSplitting = true;
330+
break;
253331
case SplitFunctionsStrategy::Profile2:
254332
Strategy = std::make_unique<SplitProfile2>();
255333
break;
@@ -409,8 +487,10 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
409487
LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
410488
<< " post-split is <0x" << Twine::utohexstr(HotSize)
411489
<< ", 0x" << Twine::utohexstr(ColdSize) << ">\n");
412-
if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
413-
alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) {
490+
if (S.autoReversal() &&
491+
alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
492+
alignTo(HotSize, opts::SplitAlignThreshold) +
493+
opts::SplitThreshold) {
414494
if (opts::Verbosity >= 2) {
415495
outs() << "BOLT-INFO: Reversing splitting of function "
416496
<< formatv("{0}:\n {1:x}, {2:x} -> {3:x}\n", BF, HotSize,

bolt/lib/Rewrite/BinaryPassManager.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ extern cl::opt<bool> PrintDynoStats;
5252
extern cl::opt<bool> DumpDotAll;
5353
extern cl::opt<std::string> AsmDump;
5454
extern cl::opt<bolt::PLTCall::OptType> PLT;
55+
extern bool threeWaySplit();
5556

5657
static cl::opt<bool>
5758
DynoStatsAll("dyno-stats-all",
@@ -430,6 +431,12 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
430431
Manager.registerPass(
431432
std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
432433

434+
if (opts::threeWaySplit()) {
435+
Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
436+
Manager.registerPass(
437+
std::make_unique<FixupBranches>(PrintAfterBranchFixup));
438+
}
439+
433440
// Print final dyno stats right while CFG and instruction analysis are intact.
434441
Manager.registerPass(
435442
std::make_unique<DynoStatsPrintPass>(

0 commit comments

Comments
 (0)