Skip to content

Commit e2f8e5c

Browse files
author
Alex B
committed
[lld-macho] Enhance safe ICF with thunk-based deduplication
Currently, our `safe` ICF mode only merges non-address-significant code, leaving duplicate address-significant functions in the output. This patch introduces `safe_thunks` ICF mode, which keeps a single master copy of each function and replaces address-significant duplicates with thunks that branch to the master copy.
1 parent 3b0a1ec commit e2f8e5c

File tree

6 files changed

+367
-4
lines changed

6 files changed

+367
-4
lines changed

lld/MachO/Arch/ARM64.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ struct ARM64 : ARM64Common {
4141
Symbol *objcMsgSend) const override;
4242
void populateThunk(InputSection *thunk, Symbol *funcSym) override;
4343
void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
44+
45+
virtual void initICFSafeThunkBody(InputSection *thunk,
46+
InputSection *branchTarget) const override;
47+
virtual uint32_t getICFSafeThunkSize() const override;
4448
};
4549

4650
} // namespace
@@ -175,6 +179,25 @@ void ARM64::populateThunk(InputSection *thunk, Symbol *funcSym) {
175179
/*offset=*/0, /*addend=*/0,
176180
/*referent=*/funcSym);
177181
}
182+
// Just a single direct branch to the target function.
183+
static constexpr uint32_t icfSafeThunkCode[] = {
184+
0x94000000, // 08: b target
185+
};
186+
187+
void ARM64::initICFSafeThunkBody(InputSection *thunk,
188+
InputSection *branchTarget) const {
189+
// The base data here will not be itself modified, we'll just be adding a
190+
// reloc below. So we can directly use the constexpr above as the data.
191+
thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode),
192+
sizeof(icfSafeThunkCode)};
193+
194+
thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_BRANCH26,
195+
/*pcrel=*/true, /*length=*/2,
196+
/*offset=*/0, /*addend=*/0,
197+
/*referent=*/branchTarget);
198+
}
199+
200+
uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }
178201

179202
ARM64::ARM64() : ARM64Common(LP64()) {
180203
cpuType = CPU_TYPE_ARM64;

lld/MachO/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ enum class ICFLevel {
6868
unknown,
6969
none,
7070
safe,
71+
safe_thunks,
7172
all,
7273
};
7374

lld/MachO/Driver.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -847,8 +847,15 @@ static ICFLevel getICFLevel(const ArgList &args) {
847847
auto icfLevel = StringSwitch<ICFLevel>(icfLevelStr)
848848
.Cases("none", "", ICFLevel::none)
849849
.Case("safe", ICFLevel::safe)
850+
.Case("safe_thunks", ICFLevel::safe_thunks)
850851
.Case("all", ICFLevel::all)
851852
.Default(ICFLevel::unknown);
853+
854+
if (icfLevel == ICFLevel::safe_thunks &&
855+
!is_contained({AK_x86_64h, AK_arm64}, config->arch())) {
856+
error("--icf=safe_thunks is only supported on arm64 targets");
857+
}
858+
852859
if (icfLevel == ICFLevel::unknown) {
853860
warn(Twine("unknown --icf=OPTION `") + icfLevelStr +
854861
"', defaulting to `none'");
@@ -2104,7 +2111,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
21042111
// foldIdenticalLiterals before foldIdenticalSections.
21052112
foldIdenticalLiterals();
21062113
if (config->icfLevel != ICFLevel::none) {
2107-
if (config->icfLevel == ICFLevel::safe)
2114+
if (config->icfLevel == ICFLevel::safe ||
2115+
config->icfLevel == ICFLevel::safe_thunks)
21082116
markAddrSigSymbols();
21092117
foldIdenticalSections(/*onlyCfStrings=*/false);
21102118
} else if (config->dedupStrings) {

lld/MachO/ICF.cpp

Lines changed: 83 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class ICF {
4545
const ConcatInputSection *ib);
4646
bool equalsVariable(const ConcatInputSection *ia,
4747
const ConcatInputSection *ib);
48+
void applySafeThunksToRange(size_t begin, size_t end);
4849

4950
// ICF needs a copy of the inputs vector because its equivalence-class
5051
// segregation algorithm destroys the proper sequence.
@@ -251,6 +252,63 @@ void ICF::forEachClassRange(size_t begin, size_t end,
251252
}
252253
}
253254

255+
// Given a range of identical icfInputs's, replace address significant functions
256+
// with a thunk that is just a direct branch to the first function in the
257+
// series. This way we end up we keep only one main body of the function but we
258+
// still retain address uniqueness of rellevant functions by having them be a
259+
// direct branch thunk rather than contain a full copy of the actual function
260+
// body.
261+
void ICF::applySafeThunksToRange(size_t begin, size_t end) {
262+
// If we need to create a unique ICF thunk, use the first section as the
263+
// section that all thunks will branch to.
264+
ConcatInputSection *masterIsec = icfInputs[begin];
265+
uint32_t thunkSize = target->getICFSafeThunkSize();
266+
static std::mutex thunkInsertionMutex;
267+
268+
uint32_t keepUniqueCount = masterIsec->keepUnique ? 1 : 0;
269+
for (size_t i = begin + 1; i < end; ++i) {
270+
ConcatInputSection *isec = icfInputs[i];
271+
if (isec->keepUnique)
272+
++keepUniqueCount;
273+
274+
// We create thunks for the 2nd, 3rd, ... keepUnique sections. The first
275+
// keepUnique section we leave as is - as it will not end up sharing an
276+
// address with any other keepUnique section.
277+
if (keepUniqueCount >= 2 && isec->keepUnique) {
278+
// If the target to be folded is smaller than the thunk size, then just
279+
// leave it as-is - creating the thunk would be a net loss.
280+
if (isec->data.size() <= thunkSize)
281+
return;
282+
283+
// applySafeThunksToRange is called from multiple threads, but
284+
// `makeSyntheticInputSection` and `addInputSection` are not thread safe.
285+
// So we need to guard them with a mutex.
286+
ConcatInputSection *thunk;
287+
{
288+
std::lock_guard<std::mutex> lock(thunkInsertionMutex);
289+
thunk = makeSyntheticInputSection(isec->getSegName(), isec->getName());
290+
addInputSection(thunk);
291+
}
292+
293+
target->initICFSafeThunkBody(thunk, masterIsec);
294+
thunk->foldIdentical(isec);
295+
296+
// Since we're folding the target function into a thunk, we need to adjust
297+
// the symbols that now got relocated from the target function to the
298+
// thunk.
299+
// Since the thunk is only one branch, we move all symbols to offset 0 and
300+
// make sure that the size of all non-zero-size symbols is equal to the
301+
// size of the branch.
302+
for (auto *sym : isec->symbols) {
303+
if (sym->value != 0)
304+
sym->value = 0;
305+
if (sym->size != 0)
306+
sym->size = thunkSize;
307+
}
308+
}
309+
}
310+
}
311+
254312
// Split icfInputs into shards, then parallelize invocation of FUNC on subranges
255313
// with matching equivalence class
256314
void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
@@ -335,9 +393,20 @@ void ICF::run() {
335393
forEachClass([&](size_t begin, size_t end) {
336394
if (end - begin < 2)
337395
return;
396+
bool useSafeThunks = config->icfLevel == ICFLevel::safe_thunks;
397+
398+
// For ICF level safe_thunks, replace keepUnique function bodies with
399+
// thunks. For all other ICF levles, directly merge the functions.
400+
if (useSafeThunks)
401+
applySafeThunksToRange(begin, end);
402+
338403
ConcatInputSection *beginIsec = icfInputs[begin];
339-
for (size_t i = begin + 1; i < end; ++i)
404+
for (size_t i = begin + 1; i < end; ++i) {
405+
// When using safe_thunks, keepUnique inputs are already handeled above
406+
if (useSafeThunks && icfInputs[i]->keepUnique)
407+
continue;
340408
beginIsec->foldIdentical(icfInputs[i]);
409+
}
341410
});
342411
}
343412

@@ -421,11 +490,22 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
421490
// can still fold it.
422491
bool hasFoldableFlags = (isSelRefsSection(isec) ||
423492
sectionType(isec->getFlags()) == MachO::S_REGULAR);
493+
494+
bool isCodeSec = isCodeSection(isec);
495+
496+
// When keepUnique is true, the section is not foldable. Unless we are at
497+
// icf level safe_thunks, in which case we still want to fold code sections.
498+
// When using safe_thunks we'll apply the safe_thunks logic at merge time
499+
// based on the 'keepUnique' flag.
500+
bool noUniqueRequirement =
501+
!isec->keepUnique ||
502+
((config->icfLevel == ICFLevel::safe_thunks) && isCodeSec);
503+
424504
// FIXME: consider non-code __text sections as foldable?
425505
bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) &&
426-
(isCodeSection(isec) || isFoldableWithAddendsRemoved ||
506+
(isCodeSec || isFoldableWithAddendsRemoved ||
427507
isGccExceptTabSection(isec)) &&
428-
!isec->keepUnique && !isec->hasAltEntry &&
508+
noUniqueRequirement && !isec->hasAltEntry &&
429509
!isec->shouldOmitFromOutput() && hasFoldableFlags;
430510
if (isFoldable) {
431511
foldable.push_back(isec);

lld/MachO/Target.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,16 @@ class TargetInfo {
7474
uint64_t selrefVA,
7575
Symbol *objcMsgSend) const = 0;
7676

77+
// Init 'thunk' so that it be a direct jump to 'branchTarget'.
78+
virtual void initICFSafeThunkBody(InputSection *thunk,
79+
InputSection *branchTarget) const {
80+
llvm_unreachable("target does not support ICF safe thunks");
81+
}
82+
83+
virtual uint32_t getICFSafeThunkSize() const {
84+
llvm_unreachable("target does not support ICF safe thunks");
85+
}
86+
7787
// Symbols may be referenced via either the GOT or the stubs section,
7888
// depending on the relocation type. prepareSymbolRelocation() will set up the
7989
// GOT/stubs entries, and resolveSymbolVA() will return the addresses of those

0 commit comments

Comments
 (0)