Skip to content

Commit 975225c

Browse files
author
Alex B
committed
[lld-macho] Enhance safe ICF with thunk-based deduplication
Currently, our `safe` ICF mode only merges non-address-significant code, leaving duplicate address-significant functions in the output. This patch introduces `safe_thunks` ICF mode, which keeps a single master copy of each function and replaces address-significant duplicates with thunks that branch to the master copy.
1 parent 3b0a1ec commit 975225c

File tree

6 files changed

+335
-4
lines changed

6 files changed

+335
-4
lines changed

lld/MachO/Arch/ARM64.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ struct ARM64 : ARM64Common {
4141
Symbol *objcMsgSend) const override;
4242
void populateThunk(InputSection *thunk, Symbol *funcSym) override;
4343
void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
44+
45+
virtual void initICFSafeThunkBody(InputSection *thunk,
46+
InputSection *branchTarget) const override;
4447
};
4548

4649
} // namespace
@@ -175,6 +178,23 @@ void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) {
175178
/*offset=*/0, /*addend=*/0,
176179
/*referent=*/funcSym);
177180
}
181+
// Just a single direct branch to the target function.
182+
static constexpr uint32_t icfSafeThunkCode[] = {
183+
0x94000000, // 08: b target
184+
};
185+
186+
void ARM64::initICFSafeThunkBody(InputSection *thunk,
187+
InputSection *branchTarget) const {
188+
// The base data here will not be itself modified, we'll just be adding a
189+
// reloc below. So we can directly use the constexpr above as the data.
190+
thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode),
191+
sizeof(icfSafeThunkCode)};
192+
193+
thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_BRANCH26,
194+
/*pcrel=*/true, /*length=*/2,
195+
/*offset=*/0, /*addend=*/0,
196+
/*referent=*/branchTarget);
197+
}
178198

179199
ARM64::ARM64() : ARM64Common(LP64()) {
180200
cpuType = CPU_TYPE_ARM64;

lld/MachO/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ enum class ICFLevel {
6868
unknown,
6969
none,
7070
safe,
71+
safe_thunks,
7172
all,
7273
};
7374

lld/MachO/Driver.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -847,8 +847,15 @@ static ICFLevel getICFLevel(const ArgList &args) {
847847
auto icfLevel = StringSwitch<ICFLevel>(icfLevelStr)
848848
.Cases("none", "", ICFLevel::none)
849849
.Case("safe", ICFLevel::safe)
850+
.Case("safe_thunks", ICFLevel::safe_thunks)
850851
.Case("all", ICFLevel::all)
851852
.Default(ICFLevel::unknown);
853+
854+
if (icfLevel == ICFLevel::safe_thunks &&
855+
!is_contained({AK_x86_64h, AK_arm64}, config->arch())) {
856+
error("--icf=safe_thunks is only supported on arm64 targets");
857+
}
858+
852859
if (icfLevel == ICFLevel::unknown) {
853860
warn(Twine("unknown --icf=OPTION `") + icfLevelStr +
854861
"', defaulting to `none'");
@@ -2104,7 +2111,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
21042111
// foldIdenticalLiterals before foldIdenticalSections.
21052112
foldIdenticalLiterals();
21062113
if (config->icfLevel != ICFLevel::none) {
2107-
if (config->icfLevel == ICFLevel::safe)
2114+
if (config->icfLevel == ICFLevel::safe ||
2115+
config->icfLevel == ICFLevel::safe_thunks)
21082116
markAddrSigSymbols();
21092117
foldIdenticalSections(/*onlyCfStrings=*/false);
21102118
} else if (config->dedupStrings) {

lld/MachO/ICF.cpp

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class ICF {
4545
const ConcatInputSection *ib);
4646
bool equalsVariable(const ConcatInputSection *ia,
4747
const ConcatInputSection *ib);
48+
void applySafeThunksToRange(size_t begin, size_t end);
4849

4950
// ICF needs a copy of the inputs vector because its equivalence-class
5051
// segregation algorithm destroys the proper sequence.
@@ -251,6 +252,38 @@ void ICF::forEachClassRange(size_t begin, size_t end,
251252
}
252253
}
253254

255+
// Given a range of identical icfInputs's, replace address significant functions
256+
// with a thunk that is just a direct branch to the first function in the
257+
// series. This way we end up we keep only one main body of the function but we
258+
// still retain address uniqueness of rellevant functions by having them be a
259+
// direct branch thunk rather than contain a full copy of the actual function
260+
// body.
261+
void ICF::applySafeThunksToRange(size_t begin, size_t end) {
262+
// If we need to create a unique ICF thunk, use the first section as the
263+
// section that all thunks will branch to.
264+
ConcatInputSection *masterIsec = icfInputs[begin];
265+
266+
uint32_t keepUniqueCount = masterIsec->keepUnique ? 1 : 0;
267+
for (size_t i = begin + 1; i < end; ++i) {
268+
ConcatInputSection *isec = icfInputs[i];
269+
if (isec->keepUnique)
270+
++keepUniqueCount;
271+
272+
// We create thunks for the 2nd, 3rd, ... keepUnique sections. The first
273+
// keepUnique section we leave as is - as it will not end up sharing an
274+
// address with any other keepUnique section.
275+
if (keepUniqueCount >= 2 && isec->keepUnique) {
276+
ConcatInputSection *thunk =
277+
makeSyntheticInputSection(isec->getSegName(), isec->getName());
278+
target->initICFSafeThunkBody(thunk, masterIsec);
279+
280+
thunk->foldIdentical(isec);
281+
282+
addInputSection(thunk);
283+
}
284+
}
285+
}
286+
254287
// Split icfInputs into shards, then parallelize invocation of FUNC on subranges
255288
// with matching equivalence class
256289
void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
@@ -335,9 +368,20 @@ void ICF::run() {
335368
forEachClass([&](size_t begin, size_t end) {
336369
if (end - begin < 2)
337370
return;
371+
bool useSafeThunks = config->icfLevel == ICFLevel::safe_thunks;
372+
373+
// For ICF level safe_thunks, replace keepUnique function bodies with
374+
// thunks. For all other ICF levles, directly merge the functions.
375+
if (useSafeThunks)
376+
applySafeThunksToRange(begin, end);
377+
338378
ConcatInputSection *beginIsec = icfInputs[begin];
339-
for (size_t i = begin + 1; i < end; ++i)
379+
for (size_t i = begin + 1; i < end; ++i) {
380+
// When using safe_thunks, keepUnique inputs are already handeled above
381+
if (useSafeThunks && icfInputs[i]->keepUnique)
382+
continue;
340383
beginIsec->foldIdentical(icfInputs[i]);
384+
}
341385
});
342386
}
343387

@@ -421,11 +465,22 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
421465
// can still fold it.
422466
bool hasFoldableFlags = (isSelRefsSection(isec) ||
423467
sectionType(isec->getFlags()) == MachO::S_REGULAR);
468+
469+
bool isCodeSec = isCodeSection(isec);
470+
471+
// When keepUnique is true, the section is not foldable. Unless we are at
472+
// icf level safe_thunks, in which case we still want to fold code sections.
473+
// When using safe_thunks we'll apply the safe_thunks logic at merge time
474+
// based on the 'keepUnique' flag.
475+
bool noUniqueRequirement =
476+
!isec->keepUnique ||
477+
((config->icfLevel == ICFLevel::safe_thunks) && isCodeSec);
478+
424479
// FIXME: consider non-code __text sections as foldable?
425480
bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) &&
426-
(isCodeSection(isec) || isFoldableWithAddendsRemoved ||
481+
(isCodeSec || isFoldableWithAddendsRemoved ||
427482
isGccExceptTabSection(isec)) &&
428-
!isec->keepUnique && !isec->hasAltEntry &&
483+
noUniqueRequirement && !isec->hasAltEntry &&
429484
!isec->shouldOmitFromOutput() && hasFoldableFlags;
430485
if (isFoldable) {
431486
foldable.push_back(isec);

lld/MachO/Target.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@ class TargetInfo {
7474
uint64_t selrefVA,
7575
Symbol *objcMsgSend) const = 0;
7676

77+
// Init 'thunk' so that it be a direct jump to 'branchTarget'.
78+
virtual void initICFSafeThunkBody(InputSection *thunk,
79+
InputSection *branchTarget) const {
80+
llvm_unreachable("target does not support ICF safe thunks");
81+
}
82+
7783
// Symbols may be referenced via either the GOT or the stubs section,
7884
// depending on the relocation type. prepareSymbolRelocation() will set up the
7985
// GOT/stubs entries, and resolveSymbolVA() will return the addresses of those

0 commit comments

Comments
 (0)