|
| 1 | +//===- bolt/Passes/CDSplit.cpp - Pass for splitting function code 3-way |
| 2 | +//--===// |
| 3 | +// |
| 4 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 5 | +// See https://llvm.org/LICENSE.txt for license information. |
| 6 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 7 | +// |
| 8 | +//===----------------------------------------------------------------------===// |
| 9 | +// |
| 10 | +// This file implements the CDSplit pass. |
| 11 | +// |
| 12 | +//===----------------------------------------------------------------------===// |
| 13 | + |
| 14 | +#include "bolt/Passes/CDSplit.h" |
| 15 | +#include "bolt/Core/ParallelUtilities.h" |
| 16 | +#include "bolt/Utils/CommandLineOpts.h" |
| 17 | +#include "llvm/ADT/SmallVector.h" |
| 18 | +#include "llvm/MC/MCInst.h" |
| 19 | +#include "llvm/Support/MathExtras.h" |
| 20 | + |
| 21 | +#define DEBUG_TYPE "bolt-opts" |
| 22 | + |
| 23 | +using namespace llvm; |
| 24 | +using namespace bolt; |
| 25 | + |
| 26 | +namespace opts { |
| 27 | + |
| 28 | +extern cl::OptionCategory BoltOptCategory; |
| 29 | + |
| 30 | +extern cl::opt<bool> UseCDSplit; |
| 31 | +extern cl::opt<bool> SplitEH; |
| 32 | +extern cl::opt<unsigned> ExecutionCountThreshold; |
| 33 | +} // namespace opts |
| 34 | + |
| 35 | +namespace llvm { |
| 36 | +namespace bolt { |
| 37 | + |
| 38 | +namespace { |
| 39 | +/// Return true if the function should be considered for building call graph. |
| 40 | +bool shouldConsider(const BinaryFunction &BF) { |
| 41 | + return BF.hasValidIndex() && BF.hasValidProfile() && !BF.empty(); |
| 42 | +} |
| 43 | +} // anonymous namespace |
| 44 | + |
| 45 | +bool CDSplit::shouldOptimize(const BinaryFunction &BF) const { |
| 46 | + // Do not split functions with a small execution count. |
| 47 | + if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold) |
| 48 | + return false; |
| 49 | + |
| 50 | + // Do not split functions with at least one block that has no known |
| 51 | + // execution count due to incomplete information. |
| 52 | + // Do not split functions with only zero-execution count blocks |
| 53 | + // as there is not enough variation in block count to justify splitting. |
| 54 | + if (!BF.hasFullProfile() || BF.allBlocksCold()) |
| 55 | + return false; |
| 56 | + |
| 57 | + return BinaryFunctionPass::shouldOptimize(BF); |
| 58 | +} |
| 59 | + |
| 60 | +/// Initialize algorithm's metadata. |
| 61 | +void CDSplit::initialize(BinaryContext &BC) { |
| 62 | + // Construct a list of functions that are considered for building call graph. |
| 63 | + // Only those in this list that evaluates true for shouldOptimize are |
| 64 | + // candidates for 3-way splitting. |
| 65 | + std::vector<BinaryFunction *> SortedFunctions = BC.getSortedFunctions(); |
| 66 | + FunctionsToConsider.reserve(SortedFunctions.size()); |
| 67 | + for (BinaryFunction *BF : SortedFunctions) { |
| 68 | + if (shouldConsider(*BF)) |
| 69 | + FunctionsToConsider.push_back(BF); |
| 70 | + } |
| 71 | +} |
| 72 | + |
| 73 | +/// Find the best index for splitting. The returned value is the index of the |
| 74 | +/// last hot basic block. Hence, "no splitting" is equivalent to returning the |
| 75 | +/// value which is one less than the size of the function. |
| 76 | +size_t CDSplit::findSplitIndex(const BinaryFunction &BF, |
| 77 | + const BasicBlockOrder &BlockOrder) { |
| 78 | + // Placeholder: hot-cold splitting. |
| 79 | + return BF.getLayout().getMainFragment().size() - 1; |
| 80 | +} |
| 81 | + |
| 82 | +/// Assign each basic block in the given function to either hot, cold, |
| 83 | +/// or warm fragment using the CDSplit algorithm. |
| 84 | +void CDSplit::assignFragmentThreeWay(const BinaryFunction &BF, |
| 85 | + const BasicBlockOrder &BlockOrder) { |
| 86 | + size_t BestSplitIndex = findSplitIndex(BF, BlockOrder); |
| 87 | + |
| 88 | + // Assign fragments based on the computed best split index. |
| 89 | + // All basic blocks with index up to the best split index become hot. |
| 90 | + // All remaining blocks are warm / cold depending on if count is |
| 91 | + // greater than 0 or not. |
| 92 | + FragmentNum Main(0); |
| 93 | + FragmentNum Warm(1); |
| 94 | + FragmentNum Cold(2); |
| 95 | + for (size_t Index = 0; Index < BlockOrder.size(); Index++) { |
| 96 | + BinaryBasicBlock *BB = BlockOrder[Index]; |
| 97 | + if (Index <= BestSplitIndex) |
| 98 | + BB->setFragmentNum(Main); |
| 99 | + else |
| 100 | + BB->setFragmentNum(BB->getKnownExecutionCount() > 0 ? Warm : Cold); |
| 101 | + } |
| 102 | +} |
| 103 | + |
| 104 | +void CDSplit::runOnFunction(BinaryFunction &BF) { |
| 105 | + assert(!BF.empty() && "splitting an empty function"); |
| 106 | + |
| 107 | + FunctionLayout &Layout = BF.getLayout(); |
| 108 | + BinaryContext &BC = BF.getBinaryContext(); |
| 109 | + |
| 110 | + BasicBlockOrder NewLayout(Layout.block_begin(), Layout.block_end()); |
| 111 | + // Never outline the first basic block. |
| 112 | + NewLayout.front()->setCanOutline(false); |
| 113 | + for (BinaryBasicBlock *BB : NewLayout) { |
| 114 | + if (!BB->canOutline()) |
| 115 | + continue; |
| 116 | + |
| 117 | + // Do not split extra entry points in aarch64. They can be referred by |
| 118 | + // using ADRs and when this happens, these blocks cannot be placed far |
| 119 | + // away due to the limited range in ADR instruction. |
| 120 | + if (BC.isAArch64() && BB->isEntryPoint()) { |
| 121 | + BB->setCanOutline(false); |
| 122 | + continue; |
| 123 | + } |
| 124 | + |
| 125 | + if (BF.hasEHRanges() && !opts::SplitEH) { |
| 126 | + // We cannot move landing pads (or rather entry points for landing pads). |
| 127 | + if (BB->isLandingPad()) { |
| 128 | + BB->setCanOutline(false); |
| 129 | + continue; |
| 130 | + } |
| 131 | + // We cannot move a block that can throw since exception-handling |
| 132 | + // runtime cannot deal with split functions. However, if we can guarantee |
| 133 | + // that the block never throws, it is safe to move the block to |
| 134 | + // decrease the size of the function. |
| 135 | + for (MCInst &Instr : *BB) { |
| 136 | + if (BC.MIB->isInvoke(Instr)) { |
| 137 | + BB->setCanOutline(false); |
| 138 | + break; |
| 139 | + } |
| 140 | + } |
| 141 | + } |
| 142 | + } |
| 143 | + |
| 144 | + // Assign each basic block in NewLayout to either hot, warm, or cold fragment. |
| 145 | + assignFragmentThreeWay(BF, NewLayout); |
| 146 | + |
| 147 | + // Make sure all non-outlineable blocks are in the main-fragment. |
| 148 | + for (BinaryBasicBlock *BB : NewLayout) { |
| 149 | + if (!BB->canOutline()) |
| 150 | + BB->setFragmentNum(FragmentNum::main()); |
| 151 | + } |
| 152 | + |
| 153 | + // In case any non-outlineable blocks previously in warm or cold is now set |
| 154 | + // to be in main by the preceding for loop, move them to the end of main. |
| 155 | + llvm::stable_sort(NewLayout, |
| 156 | + [&](const BinaryBasicBlock *L, const BinaryBasicBlock *R) { |
| 157 | + return L->getFragmentNum() < R->getFragmentNum(); |
| 158 | + }); |
| 159 | + |
| 160 | + BF.getLayout().update(NewLayout); |
| 161 | + |
| 162 | + // For shared objects, invoke instructions and corresponding landing pads |
| 163 | + // have to be placed in the same fragment. When we split them, create |
| 164 | + // trampoline landing pads that will redirect the execution to real LPs. |
| 165 | + SplitFunctions::TrampolineSetType Trampolines; |
| 166 | + if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) |
| 167 | + Trampolines = SplitFunctions::createEHTrampolines(BF); |
| 168 | + |
| 169 | + if (BC.isX86() && BF.isSplit()) { |
| 170 | + size_t HotSize; |
| 171 | + size_t ColdSize; |
| 172 | + std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF); |
| 173 | + SplitBytesHot += HotSize; |
| 174 | + SplitBytesCold += ColdSize; |
| 175 | + } |
| 176 | +} |
| 177 | + |
| 178 | +void CDSplit::runOnFunctions(BinaryContext &BC) { |
| 179 | + if (!opts::UseCDSplit) |
| 180 | + return; |
| 181 | + |
| 182 | + // Initialize global variables. |
| 183 | + initialize(BC); |
| 184 | + |
| 185 | + // Only functions satisfying shouldConsider and shouldOptimize are candidates |
| 186 | + // for splitting. |
| 187 | + ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { |
| 188 | + return !(shouldConsider(BF) && shouldOptimize(BF)); |
| 189 | + }; |
| 190 | + |
| 191 | + // Make function splitting decisions in parallel. |
| 192 | + ParallelUtilities::runOnEachFunction( |
| 193 | + BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, |
| 194 | + [&](BinaryFunction &BF) { runOnFunction(BF); }, SkipFunc, "CDSplit", |
| 195 | + /*ForceSequential=*/false); |
| 196 | + |
| 197 | + if (SplitBytesHot + SplitBytesCold > 0) { |
| 198 | + outs() << "BOLT-INFO: cdsplit separates " << SplitBytesHot |
| 199 | + << " hot bytes from " << SplitBytesCold << " cold bytes " |
| 200 | + << format("(%.2lf%% of split functions is in the main fragment)\n", |
| 201 | + 100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold)); |
| 202 | + |
| 203 | + } else |
| 204 | + outs() << "BOLT-INFO: cdsplit didn't split any functions\n"; |
| 205 | +} |
| 206 | + |
| 207 | +} // namespace bolt |
| 208 | +} // namespace llvm |
0 commit comments