Skip to content

Commit dd09a7d

Browse files
authored
[BOLT] Add split function support for the Linux kernel (#90541)
While rewriting the Linux kernel, we try to fit optimized functions into their original boundaries. When a function becomes larger, we skip it during the rewrite and end up with less than optimal code layout. To overcome that issue, add support for --split-function option so that hot part of the function could be fit into the original space. The cold part should go to reserved space in the binary.
1 parent df91cde commit dd09a7d

File tree

2 files changed

+48
-9
lines changed

2 files changed

+48
-9
lines changed

bolt/lib/Passes/SplitFunctions.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,12 @@ Error SplitFunctions::runOnFunctions(BinaryContext &BC) {
715715
if (!opts::SplitFunctions)
716716
return Error::success();
717717

718+
if (BC.IsLinuxKernel && BC.BOLTReserved.empty()) {
719+
BC.errs() << "BOLT-ERROR: split functions require reserved space in the "
720+
"Linux kernel binary\n";
721+
exit(1);
722+
}
723+
718724
// If split strategy is not CDSplit, then a second run of the pass is not
719725
// needed after function reordering.
720726
if (BC.HasFinalizedFunctionOrder &&
@@ -829,6 +835,13 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
829835
}
830836
}
831837
}
838+
839+
// Outlining blocks with dynamic branches is not supported yet.
840+
if (BC.IsLinuxKernel) {
841+
if (llvm::any_of(
842+
*BB, [&](MCInst &Inst) { return BC.MIB->isDynamicBranch(Inst); }))
843+
BB->setCanOutline(false);
844+
}
832845
}
833846

834847
BF.getLayout().updateLayoutIndices();

bolt/lib/Rewrite/LinuxKernelRewriter.cpp

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -783,11 +783,9 @@ Error LinuxKernelRewriter::rewriteORCTables() {
783783
};
784784

785785
// Emit new ORC entries for the emitted function.
786-
auto emitORC = [&](const BinaryFunction &BF) -> Error {
787-
assert(!BF.isSplit() && "Split functions not supported by ORC writer yet.");
788-
786+
auto emitORC = [&](const FunctionFragment &FF) -> Error {
789787
ORCState CurrentState = NullORC;
790-
for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
788+
for (BinaryBasicBlock *BB : FF) {
791789
for (MCInst &Inst : *BB) {
792790
ErrorOr<ORCState> ErrorOrState =
793791
BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
@@ -808,7 +806,36 @@ Error LinuxKernelRewriter::rewriteORCTables() {
808806
return Error::success();
809807
};
810808

809+
// Emit ORC entries for cold fragments. We assume that these fragments are
810+
// emitted contiguously in memory using reserved space in the kernel. This
811+
// assumption is validated in post-emit pass validateORCTables() where we
812+
// check that ORC entries are sorted by their addresses.
813+
auto emitColdORC = [&]() -> Error {
814+
for (BinaryFunction &BF :
815+
llvm::make_second_range(BC.getBinaryFunctions())) {
816+
if (!BC.shouldEmit(BF))
817+
continue;
818+
for (FunctionFragment &FF : BF.getLayout().getSplitFragments())
819+
if (Error E = emitORC(FF))
820+
return E;
821+
}
822+
823+
return Error::success();
824+
};
825+
826+
bool ShouldEmitCold = !BC.BOLTReserved.empty();
811827
for (ORCListEntry &Entry : ORCEntries) {
828+
if (ShouldEmitCold && Entry.IP > BC.BOLTReserved.start()) {
829+
if (Error E = emitColdORC())
830+
return E;
831+
832+
// Emit terminator entry at the end of the reserved region.
833+
if (Error E = emitORCEntry(BC.BOLTReserved.end(), NullORC))
834+
return E;
835+
836+
ShouldEmitCold = false;
837+
}
838+
812839
// Emit original entries for functions that we haven't modified.
813840
if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) {
814841
// Emit terminator only if it marks the start of a function.
@@ -822,7 +849,7 @@ Error LinuxKernelRewriter::rewriteORCTables() {
822849
// Emit all ORC entries for a function referenced by an entry and skip over
823850
// the rest of entries for this function by resetting its ORC attribute.
824851
if (Entry.BF->hasORC()) {
825-
if (Error E = emitORC(*Entry.BF))
852+
if (Error E = emitORC(Entry.BF->getLayout().getMainFragment()))
826853
return E;
827854
Entry.BF->setHasORC(false);
828855
}
@@ -831,10 +858,9 @@ Error LinuxKernelRewriter::rewriteORCTables() {
831858
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
832859
<< " ORC entries\n");
833860

834-
// Replicate terminator entry at the end of sections to match the original
835-
// table sizes.
836-
const BinaryFunction &LastBF = BC.getBinaryFunctions().rbegin()->second;
837-
const uint64_t LastIP = LastBF.getAddress() + LastBF.getMaxSize();
861+
// Populate ORC tables with a terminator entry with max address to match the
862+
// original table sizes.
863+
const uint64_t LastIP = std::numeric_limits<uint64_t>::max();
838864
while (UnwindWriter.bytesRemaining()) {
839865
if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true))
840866
return E;

0 commit comments

Comments
 (0)