Skip to content

Commit 4ac79a8

Browse files
authored
[lld-macho] Use Symbols as branch target for safe_thunks ICF (llvm#126835)
## Problem The `safe_thunks` ICF optimization in `lld-macho` was creating thunks that pointed to `InputSection`s instead of `Symbol`s. While, generally, branch relocations can point to symbols or input sections, in this case we need them to point to symbols as subsequently the branch extension algorithm expects branches to always point to `Symbol`'s. ## Solution This patch changes the ICF implementation so that safe thunks point to `Symbol`'s rather than `InputSection`s. ## Testing The existing `arm64-thunks.s` test is modified to include `--icf=safe_thunks` to explicitly verify the interaction between ICF and branch range extension thunks. Two functions were added that will be merged together via a thunk. Before this patch, this test would generate an assert - now this scenario is correctly handled.
1 parent c2e9677 commit 4ac79a8

File tree

4 files changed

+80
-25
lines changed

4 files changed

+80
-25
lines changed

lld/MachO/Arch/ARM64.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ struct ARM64 : ARM64Common {
4343
void applyOptimizationHints(uint8_t *, const ObjFile &) const override;
4444

4545
void initICFSafeThunkBody(InputSection *thunk,
46-
InputSection *branchTarget) const override;
47-
InputSection *getThunkBranchTarget(InputSection *thunk) const override;
46+
Symbol *targetSym) const override;
47+
Symbol *getThunkBranchTarget(InputSection *thunk) const override;
4848
uint32_t getICFSafeThunkSize() const override;
4949
};
5050

@@ -185,8 +185,7 @@ static constexpr uint32_t icfSafeThunkCode[] = {
185185
0x14000000, // 08: b target
186186
};
187187

188-
void ARM64::initICFSafeThunkBody(InputSection *thunk,
189-
InputSection *branchTarget) const {
188+
void ARM64::initICFSafeThunkBody(InputSection *thunk, Symbol *targetSym) const {
190189
// The base data here will not be itself modified, we'll just be adding a
191190
// reloc below. So we can directly use the constexpr above as the data.
192191
thunk->data = {reinterpret_cast<const uint8_t *>(icfSafeThunkCode),
@@ -195,17 +194,17 @@ void ARM64::initICFSafeThunkBody(InputSection *thunk,
195194
thunk->relocs.emplace_back(/*type=*/ARM64_RELOC_BRANCH26,
196195
/*pcrel=*/true, /*length=*/2,
197196
/*offset=*/0, /*addend=*/0,
198-
/*referent=*/branchTarget);
197+
/*referent=*/targetSym);
199198
}
200199

201-
InputSection *ARM64::getThunkBranchTarget(InputSection *thunk) const {
200+
Symbol *ARM64::getThunkBranchTarget(InputSection *thunk) const {
202201
assert(thunk->relocs.size() == 1 &&
203202
"expected a single reloc on ARM64 ICF thunk");
204203
auto &reloc = thunk->relocs[0];
205-
assert(isa<InputSection *>(reloc.referent) &&
206-
"ARM64 thunk reloc is expected to point to an InputSection");
204+
assert(isa<Symbol *>(reloc.referent) &&
205+
"ARM64 thunk reloc is expected to point to a Symbol");
207206

208-
return cast<InputSection *>(reloc.referent);
207+
return cast<Symbol *>(reloc.referent);
209208
}
210209

211210
uint32_t ARM64::getICFSafeThunkSize() const { return sizeof(icfSafeThunkCode); }

lld/MachO/ICF.cpp

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ using namespace lld;
2727
using namespace lld::macho;
2828

2929
static constexpr bool verboseDiagnostics = false;
30+
// This counter is used to generate unique thunk names.
31+
static uint64_t icfThunkCounter = 0;
3032

3133
class ICF {
3234
public:
@@ -263,6 +265,31 @@ void ICF::forEachClassRange(size_t begin, size_t end,
263265
}
264266
}
265267

268+
// Find or create a symbol at offset 0 in the given section
269+
static Symbol *getThunkTargetSymbol(ConcatInputSection *isec) {
270+
for (Symbol *sym : isec->symbols)
271+
if (auto *d = dyn_cast<Defined>(sym))
272+
if (d->value == 0)
273+
return sym;
274+
275+
std::string thunkName;
276+
if (isec->symbols.size() == 0)
277+
thunkName = isec->getName().str() + ".icf.0";
278+
else
279+
thunkName = isec->getName().str() + "icf.thunk.target" +
280+
std::to_string(icfThunkCounter++);
281+
282+
// If no symbol found at offset 0, create one
283+
auto *sym = make<Defined>(thunkName, /*file=*/nullptr, isec,
284+
/*value=*/0, /*size=*/isec->getSize(),
285+
/*isWeakDef=*/false, /*isExternal=*/false,
286+
/*isPrivateExtern=*/false, /*isThumb=*/false,
287+
/*isReferencedDynamically=*/false,
288+
/*noDeadStrip=*/false);
289+
isec->symbols.push_back(sym);
290+
return sym;
291+
}
292+
266293
// Given a range of identical icfInputs, replace address significant functions
267294
// with a thunk that is just a direct branch to the first function in the
268295
// series. This way we keep only one main body of the function but we still
@@ -280,6 +307,9 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
280307
// all thunks will branch to.
281308
ConcatInputSection *masterIsec = icfInputs[begin];
282309

310+
// Get the symbol that all thunks will branch to.
311+
Symbol *masterSym = getThunkTargetSymbol(masterIsec);
312+
283313
for (size_t i = begin + 1; i < end; ++i) {
284314
ConcatInputSection *isec = icfInputs[i];
285315
// When we're done processing keepUnique entries, we can stop. Sorting
@@ -291,7 +321,7 @@ void ICF::applySafeThunksToRange(size_t begin, size_t end) {
291321
makeSyntheticInputSection(isec->getSegName(), isec->getName());
292322
addInputSection(thunk);
293323

294-
target->initICFSafeThunkBody(thunk, masterIsec);
324+
target->initICFSafeThunkBody(thunk, masterSym);
295325
thunk->foldIdentical(isec, Symbol::ICFFoldKind::Thunk);
296326

297327
// Since we're folding the target function into a thunk, we need to adjust
@@ -495,18 +525,11 @@ Defined *macho::getBodyForThunkFoldedSym(Defined *foldedSym) {
495525
// the actual body of the function.
496526
InputSection *thunkBody = foldedSec->replacement;
497527

498-
// The actual (merged) body of the function that the thunk jumps to. This will
499-
// end up in the final binary.
500-
InputSection *functionBody = target->getThunkBranchTarget(thunkBody);
501-
502-
for (Symbol *sym : functionBody->symbols) {
503-
Defined *d = dyn_cast<Defined>(sym);
504-
// The symbol needs to be at the start of the InputSection
505-
if (d && d->value == 0)
506-
return d;
507-
}
528+
// The symbol of the merged body of the function that the thunk jumps to. This
529+
// will end up in the final binary.
530+
Symbol *targetSym = target->getThunkBranchTarget(thunkBody);
508531

509-
llvm_unreachable("could not find body symbol for ICF-generated thunk");
532+
return cast<Defined>(targetSym);
510533
}
511534
void macho::foldIdenticalSections(bool onlyCfStrings) {
512535
TimeTraceScope timeScope("Fold Identical Code Sections");
@@ -526,6 +549,8 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
526549
// ICF::segregate()
527550
std::vector<ConcatInputSection *> foldable;
528551
uint64_t icfUniqueID = inputSections.size();
552+
// Reset the thunk counter for each run of ICF.
553+
icfThunkCounter = 0;
529554
for (ConcatInputSection *isec : inputSections) {
530555
bool isFoldableWithAddendsRemoved = isCfStringSection(isec) ||
531556
isClassRefsSection(isec) ||

lld/MachO/Target.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,13 @@ class TargetInfo {
7676

7777
// Init 'thunk' so that it be a direct jump to 'branchTarget'.
7878
virtual void initICFSafeThunkBody(InputSection *thunk,
79-
InputSection *branchTarget) const {
79+
Symbol *targetSym) const {
8080
llvm_unreachable("target does not support ICF safe thunks");
8181
}
8282

8383
// Given a thunk for which `initICFSafeThunkBody` was called, return the
8484
// branchTarget it was initialized with.
85-
virtual InputSection *getThunkBranchTarget(InputSection *thunk) const {
85+
virtual Symbol *getThunkBranchTarget(InputSection *thunk) const {
8686
llvm_unreachable("target does not support ICF safe thunks");
8787
}
8888

lld/test/MachO/arm64-thunks.s

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,16 @@
1414

1515
# RUN: rm -rf %t; mkdir %t
1616
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
17-
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o
17+
## Use --icf=safe_thunks to test that branch extension algo is compatible
18+
## with safe_thunks ICF.
19+
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o --icf=safe_thunks
1820
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
1921

2022
# RUN: FileCheck %s --input-file %t/thunk.map --check-prefix=MAP
2123

22-
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _b
24+
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr
25+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _a
26+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b
2327
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c
2428
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.0
2529
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.0
@@ -35,10 +39,13 @@
3539
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b.thunk.0
3640
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _h
3741
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _main
42+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_high_addr
3843
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c.thunk.0
3944
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.1
4045
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.1
4146
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.1
47+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr.thunk.0
48+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} ltmp0.thunk.0
4249
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _z
4350

4451

@@ -200,8 +207,21 @@
200207
# CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>:
201208

202209
.subsections_via_symbols
210+
211+
.addrsig
212+
.addrsig_sym _fold_func_low_addr
213+
.addrsig_sym _fold_func_high_addr
214+
203215
.text
204216

217+
.globl _fold_func_low_addr
218+
.p2align 2
219+
_fold_func_low_addr:
220+
add x0, x0, x0
221+
add x1, x0, x1
222+
add x2, x0, x2
223+
ret
224+
205225
.globl _a
206226
.p2align 2
207227
_a:
@@ -329,9 +349,20 @@ _main:
329349
bl _f
330350
bl _g
331351
bl _h
352+
bl _fold_func_low_addr
353+
bl _fold_func_high_addr
332354
bl ___nan
333355
ret
334356

357+
.globl _fold_func_high_addr
358+
.p2align 2
359+
_fold_func_high_addr:
360+
add x0, x0, x0
361+
add x1, x0, x1
362+
add x2, x0, x2
363+
ret
364+
365+
335366
.section __TEXT,__cstring
336367
# The .space below has to be composed of non-zero characters. Otherwise, the
337368
# linker will create a symbol for every '0' in the section, leading to

0 commit comments

Comments
 (0)