Skip to content

Commit bbae02a

Browse files
ColibrowMaskRay
authored andcommitted
Add new ELF linker options for order section layout
1 parent a5e969a commit bbae02a

File tree

9 files changed

+694
-0
lines changed

9 files changed

+694
-0
lines changed

lld/ELF/BPSectionOrderer.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
//===- BPSectionOrderer.cpp------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "BPSectionOrderer.h"
10+
#include "llvm/Support/xxhash.h"
11+
12+
using namespace llvm;
13+
using namespace lld::elf;
14+
15+
void BPSectionELF::getSectionHashes(
16+
llvm::SmallVectorImpl<uint64_t> &hashes,
17+
const llvm::DenseMap<const void *, uint64_t> &sectionToIdx) const {
18+
constexpr unsigned windowSize = 4;
19+
20+
size_t size = isec->content().size();
21+
for (size_t i = 0; i != size; ++i) {
22+
auto window = isec->content().drop_front(i).take_front(windowSize);
23+
hashes.push_back(xxHash64(window));
24+
}
25+
26+
llvm::sort(hashes);
27+
hashes.erase(std::unique(hashes.begin(), hashes.end()), hashes.end());
28+
}
29+
30+
llvm::DenseMap<const lld::elf::InputSectionBase *, int>
31+
lld::elf::runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
32+
bool forFunctionCompression,
33+
bool forDataCompression,
34+
bool compressionSortStartupFunctions,
35+
bool verbose) {
36+
// Collect sections from symbols and wrap as BPSectionELF instances.
37+
// Deduplicates sections referenced by multiple symbols.
38+
SmallVector<std::unique_ptr<BPSectionBase>> sections;
39+
DenseSet<const InputSectionBase *> seenSections;
40+
41+
for (Symbol *sym : ctx.symtab->getSymbols())
42+
if (sym->getSize() > 0)
43+
if (auto *d = dyn_cast<Defined>(sym))
44+
if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
45+
if (seenSections.insert(sec).second)
46+
sections.emplace_back(std::make_unique<BPSectionELF>(sec));
47+
48+
for (ELFFileBase *file : ctx.objectFiles)
49+
for (Symbol *sym : file->getLocalSymbols())
50+
if (sym->getSize() > 0)
51+
if (auto *d = dyn_cast<Defined>(sym))
52+
if (auto *sec = dyn_cast_or_null<InputSectionBase>(d->section))
53+
if (seenSections.insert(sec).second)
54+
sections.emplace_back(std::make_unique<BPSectionELF>(sec));
55+
56+
auto reorderedSections = BPSectionBase::reorderSectionsByBalancedPartitioning(
57+
profilePath, forFunctionCompression, forDataCompression,
58+
compressionSortStartupFunctions, verbose, sections);
59+
60+
DenseMap<const InputSectionBase *, int> result;
61+
for (const auto [sec, priority] : reorderedSections) {
62+
auto *elfSection = cast<BPSectionELF>(sec);
63+
result.try_emplace(
64+
static_cast<const InputSectionBase *>(elfSection->getSection()),
65+
static_cast<int>(priority));
66+
}
67+
return result;
68+
}

lld/ELF/BPSectionOrderer.h

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
//===- BPSectionOrderer.h -------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// This file uses Balanced Partitioning to order sections to improve startup
10+
/// time and compressed size.
11+
///
12+
//===----------------------------------------------------------------------===//
13+
14+
#ifndef LLD_ELF_BPSECTION_ORDERER_H
15+
#define LLD_ELF_BPSECTION_ORDERER_H
16+
17+
#include "InputFiles.h"
18+
#include "InputSection.h"
19+
#include "SymbolTable.h"
20+
#include "lld/Common/BPSectionOrdererBase.h"
21+
#include "llvm/ADT/DenseMap.h"
22+
#include "llvm/ADT/STLExtras.h"
23+
#include "llvm/ADT/StringRef.h"
24+
#include "llvm/BinaryFormat/ELF.h"
25+
26+
namespace lld::elf {
27+
28+
class InputSection;
29+
30+
class BPSymbolELF : public BPSymbol {
31+
const Symbol *sym;
32+
33+
public:
34+
explicit BPSymbolELF(const Symbol *s) : sym(s) {}
35+
36+
llvm::StringRef getName() const override { return sym->getName(); }
37+
38+
const Defined *asDefined() const { return llvm::dyn_cast<Defined>(sym); }
39+
40+
std::optional<uint64_t> getValue() const override {
41+
if (auto *d = asDefined())
42+
return d->value;
43+
return {};
44+
}
45+
46+
std::optional<uint64_t> getSize() const override {
47+
if (auto *d = asDefined())
48+
return d->size;
49+
return {};
50+
}
51+
52+
InputSectionBase *getInputSection() const {
53+
if (auto *d = asDefined())
54+
return llvm::dyn_cast_or_null<InputSectionBase>(d->section);
55+
return nullptr;
56+
}
57+
58+
const Symbol *getSymbol() const { return sym; }
59+
};
60+
61+
class BPSectionELF : public BPSectionBase {
62+
const InputSectionBase *isec;
63+
64+
public:
65+
explicit BPSectionELF(const InputSectionBase *sec) : isec(sec) {}
66+
67+
const void *getSection() const override { return isec; }
68+
69+
uint64_t getSize() const override { return isec->getSize(); }
70+
71+
bool isCodeSection() const override {
72+
return isec->flags & llvm::ELF::SHF_EXECINSTR;
73+
}
74+
75+
SmallVector<std::unique_ptr<BPSymbol>> getSymbols() const override {
76+
SmallVector<std::unique_ptr<BPSymbol>> symbols;
77+
for (Symbol *sym : isec->file->getSymbols())
78+
if (auto *d = dyn_cast<Defined>(sym))
79+
if (d->size > 0 && d->section == isec)
80+
symbols.emplace_back(std::make_unique<BPSymbolELF>(sym));
81+
82+
return symbols;
83+
}
84+
85+
std::optional<StringRef>
86+
getResolvedLinkageName(llvm::StringRef name) const override {
87+
return {};
88+
}
89+
90+
void getSectionHashes(llvm::SmallVectorImpl<uint64_t> &hashes,
91+
const llvm::DenseMap<const void *, uint64_t>
92+
&sectionToIdx) const override;
93+
94+
static bool classof(const BPSectionBase *s) { return true; }
95+
};
96+
97+
/// Run Balanced Partitioning to find the optimal function and data order to
98+
/// improve startup time and compressed size.
99+
///
100+
/// It is important that -ffunction-sections and -fdata-sections are used to
101+
/// ensure functions and data are in their own sections and thus can be
102+
/// reordered.
103+
llvm::DenseMap<const InputSectionBase *, int>
104+
runBalancedPartitioning(Ctx &ctx, llvm::StringRef profilePath,
105+
bool forFunctionCompression, bool forDataCompression,
106+
bool compressionSortStartupFunctions, bool verbose);
107+
} // namespace lld::elf
108+
109+
#endif

lld/ELF/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ add_lld_library(lldELF
3737
Arch/X86.cpp
3838
Arch/X86_64.cpp
3939
ARMErrataFix.cpp
40+
BPSectionOrderer.cpp
4041
CallGraphSort.cpp
4142
DWARF.cpp
4243
Driver.cpp

lld/ELF/Config.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,12 @@ struct Config {
264264
bool armBe8 = false;
265265
BsymbolicKind bsymbolic = BsymbolicKind::None;
266266
CGProfileSortKind callGraphProfileSort;
267+
llvm::StringRef irpgoProfilePath;
268+
bool bpStartupFunctionSort = false;
269+
bool bpCompressionSortStartupFunctions = false;
270+
bool bpFunctionOrderForCompression = false;
271+
bool bpDataOrderForCompression = false;
272+
bool bpVerboseSectionOrderer = false;
267273
bool checkSections;
268274
bool checkDynamicRelocs;
269275
std::optional<llvm::DebugCompressionType> compressDebugSections;

lld/ELF/Driver.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,55 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
12591259
ctx.arg.bsymbolic = BsymbolicKind::All;
12601260
}
12611261
ctx.arg.callGraphProfileSort = getCGProfileSortKind(ctx, args);
1262+
ctx.arg.irpgoProfilePath = args.getLastArgValue(OPT_irpgo_profile);
1263+
ctx.arg.bpCompressionSortStartupFunctions =
1264+
args.hasFlag(OPT_bp_compression_sort_startup_functions,
1265+
OPT_no_bp_compression_sort_startup_functions, false);
1266+
if (auto *arg = args.getLastArg(OPT_bp_startup_sort)) {
1267+
StringRef startupSortStr = arg->getValue();
1268+
if (startupSortStr == "function") {
1269+
ctx.arg.bpStartupFunctionSort = true;
1270+
} else if (startupSortStr != "none") {
1271+
ErrAlways(ctx) << "unknown value '" + startupSortStr + "' for " +
1272+
arg->getSpelling();
1273+
}
1274+
if (startupSortStr != "none")
1275+
if (args.hasArg(OPT_call_graph_ordering_file))
1276+
ErrAlways(ctx) << "--bp-startup-sort=function is incompatible with "
1277+
"--call-graph-ordering-file";
1278+
}
1279+
if (ctx.arg.irpgoProfilePath.empty()) {
1280+
if (ctx.arg.bpStartupFunctionSort)
1281+
ErrAlways(ctx) << "--bp-startup-sort=function must be used with "
1282+
"--irpgo-profile";
1283+
if (ctx.arg.bpCompressionSortStartupFunctions)
1284+
ErrAlways(ctx)
1285+
<< "--bp-compression-sort-startup-functions must be used with "
1286+
"--irpgo-profile";
1287+
}
1288+
1289+
if (auto *arg = args.getLastArg(OPT_bp_compression_sort)) {
1290+
StringRef compressionSortStr = arg->getValue();
1291+
if (compressionSortStr == "function") {
1292+
ctx.arg.bpFunctionOrderForCompression = true;
1293+
} else if (compressionSortStr == "data") {
1294+
ctx.arg.bpDataOrderForCompression = true;
1295+
} else if (compressionSortStr == "both") {
1296+
ctx.arg.bpFunctionOrderForCompression = true;
1297+
ctx.arg.bpDataOrderForCompression = true;
1298+
} else if (compressionSortStr != "none") {
1299+
ErrAlways(ctx) << "unknown value '" + compressionSortStr + "' for " +
1300+
arg->getSpelling();
1301+
}
1302+
if (ctx.arg.bpDataOrderForCompression ||
1303+
ctx.arg.bpFunctionOrderForCompression) {
1304+
if (args.getLastArg(OPT_call_graph_ordering_file) != nullptr) {
1305+
ErrAlways(ctx) << "--bp-compression-sort is incompatible with "
1306+
"--call-graph-ordering-file";
1307+
}
1308+
}
1309+
}
1310+
ctx.arg.bpVerboseSectionOrderer = args.hasArg(OPT_verbose_bp_section_orderer);
12621311
ctx.arg.checkSections =
12631312
args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
12641313
ctx.arg.chroot = args.getLastArgValue(OPT_chroot);

lld/ELF/Options.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,24 @@ def call_graph_profile_sort: JJ<"call-graph-profile-sort=">,
141141
def : FF<"no-call-graph-profile-sort">, Alias<call_graph_profile_sort>, AliasArgs<["none"]>,
142142
Flags<[HelpHidden]>;
143143

144+
defm irpgo_profile: Eq<"irpgo-profile",
145+
"Read the IRPGO profile for use with -bp-startup-sort and other profile-guided optimizations">;
146+
147+
def bp_startup_sort: JJ<"bp-startup-sort=">,
148+
MetaVarName<"[none,function]">,
149+
HelpText<"Order sections based on profile data to improve startup time">;
150+
151+
defm bp_compression_sort_startup_functions: BB<"bp-compression-sort-startup-functions",
152+
"Order startup functions by balanced partition to improve compressed size in addition to startup time",
153+
"Do not order startup function for compression">;
154+
155+
def bp_compression_sort: JJ<"bp-compression-sort=">,
156+
MetaVarName<"[none,function,data,both]">,
157+
HelpText<"Order sections by balanced partition to improve compressed size">;
158+
159+
def verbose_bp_section_orderer: FF<"verbose-bp-section-orderer">,
160+
HelpText<"Print information on how many sections were ordered by balanced partitioning and a measure of the expected number of page faults">;
161+
144162
// --chroot doesn't have a help text because it is an internal option.
145163
def chroot: Separate<["--"], "chroot">;
146164

lld/ELF/Writer.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "Writer.h"
1010
#include "AArch64ErrataFix.h"
1111
#include "ARMErrataFix.h"
12+
#include "BPSectionOrderer.h"
1213
#include "CallGraphSort.h"
1314
#include "Config.h"
1415
#include "InputFiles.h"
@@ -1082,6 +1083,16 @@ static void maybeShuffle(Ctx &ctx,
10821083
// that don't appear in the order file.
10831084
static DenseMap<const InputSectionBase *, int> buildSectionOrder(Ctx &ctx) {
10841085
DenseMap<const InputSectionBase *, int> sectionOrder;
1086+
if (ctx.arg.bpStartupFunctionSort || ctx.arg.bpFunctionOrderForCompression ||
1087+
ctx.arg.bpDataOrderForCompression) {
1088+
TimeTraceScope timeScope("Balanced Partitioning Section Orderer");
1089+
sectionOrder = runBalancedPartitioning(
1090+
ctx, ctx.arg.bpStartupFunctionSort ? ctx.arg.irpgoProfilePath : "",
1091+
ctx.arg.bpFunctionOrderForCompression,
1092+
ctx.arg.bpDataOrderForCompression,
1093+
ctx.arg.bpCompressionSortStartupFunctions,
1094+
ctx.arg.bpVerboseSectionOrderer);
1095+
}
10851096
if (!ctx.arg.callGraphProfile.empty())
10861097
sectionOrder = computeCallGraphProfileOrder(ctx);
10871098

0 commit comments

Comments
 (0)