Skip to content

Commit 389e0a8

Browse files
committed
[lld-macho] Support synthesizing __TEXT,__init_offsets
This section stores 32-bit `__TEXT` segment offsets of initializer functions, and is used instead of `__mod_init_func` when chained fixups are enabled. Storing the offsets lets us avoid emitting fixups for the initializers. Differential Revision: https://reviews.llvm.org/D132947
1 parent b58ed43 commit 389e0a8

File tree

14 files changed

+206
-16
lines changed

14 files changed

+206
-16
lines changed

lld/MachO/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ struct Configuration {
131131
bool emitBitcodeBundle = false;
132132
bool emitDataInCodeInfo = false;
133133
bool emitEncryptionInfo = false;
134+
bool emitInitOffsets = false;
134135
bool timeTraceEnabled = false;
135136
bool dataConst = false;
136137
bool dedupLiterals = true;

lld/MachO/Driver.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,6 +1103,11 @@ static void gatherInputSections() {
11031103
if (auto *isec = dyn_cast<ConcatInputSection>(subsection.isec)) {
11041104
if (isec->isCoalescedWeak())
11051105
continue;
1106+
if (config->emitInitOffsets &&
1107+
sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {
1108+
in.initOffsets->addInput(isec);
1109+
continue;
1110+
}
11061111
isec->outSecOff = inputOrder++;
11071112
if (!osec)
11081113
osec = ConcatOutputSection::getOrCreateForInput(isec);
@@ -1432,6 +1437,7 @@ bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
14321437
config->emitBitcodeBundle = args.hasArg(OPT_bitcode_bundle);
14331438
config->emitDataInCodeInfo =
14341439
args.hasFlag(OPT_data_in_code_info, OPT_no_data_in_code_info, true);
1440+
config->emitInitOffsets = args.hasArg(OPT_init_offsets);
14351441
config->icfLevel = getICFLevel(args);
14361442
config->dedupLiterals =
14371443
args.hasFlag(OPT_deduplicate_literals, OPT_icf_eq, false) ||

lld/MachO/InputSection.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ constexpr const char functionStarts[] = "__func_starts";
314314
constexpr const char got[] = "__got";
315315
constexpr const char header[] = "__mach_header";
316316
constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
317+
constexpr const char initOffsets[] = "__init_offsets";
317318
constexpr const char const_[] = "__const";
318319
constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";
319320
constexpr const char lazyBinding[] = "__lazy_binding";

lld/MachO/MarkLive.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,16 @@ void markLive() {
279279
// mod_init_funcs, mod_term_funcs sections
280280
if (sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS ||
281281
sectionType(isec->getFlags()) == S_MOD_TERM_FUNC_POINTERS) {
282+
assert(!config->emitInitOffsets ||
283+
sectionType(isec->getFlags()) != S_MOD_INIT_FUNC_POINTERS);
282284
marker->enqueue(isec, 0);
283285
continue;
284286
}
285287
}
286288

289+
for (ConcatInputSection *isec : in.initOffsets->inputs())
290+
marker->enqueue(isec, 0);
291+
287292
marker->markTransitively();
288293
}
289294

lld/MachO/Options.td

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,8 +1273,7 @@ def ignore_optimization_hints : Flag<["-"], "ignore_optimization_hints">,
12731273
HelpText<"Ignore Linker Optimization Hints">,
12741274
Group<grp_undocumented>;
12751275
def init_offsets : Flag<["-"], "init_offsets">,
1276-
HelpText<"This option is undocumented in ld64">,
1277-
Flags<[HelpHidden]>,
1276+
HelpText<"Store __TEXT segment offsets of static initializers">,
12781277
Group<grp_undocumented>;
12791278
def keep_dwarf_unwind : Flag<["-"], "keep_dwarf_unwind">,
12801279
HelpText<"This option is undocumented in ld64">,

lld/MachO/OutputSegment.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,11 @@ static int sectionOrder(OutputSection *osec) {
8484
// Sections are uniquely identified by their segment + section name.
8585
if (segname == segment_names::text) {
8686
return StringSwitch<int>(osec->name)
87-
.Case(section_names::header, -4)
88-
.Case(section_names::text, -3)
89-
.Case(section_names::stubs, -2)
90-
.Case(section_names::stubHelper, -1)
87+
.Case(section_names::header, -5)
88+
.Case(section_names::text, -4)
89+
.Case(section_names::stubs, -3)
90+
.Case(section_names::stubHelper, -2)
91+
.Case(section_names::initOffsets, -1)
9192
.Case(section_names::unwindInfo, std::numeric_limits<int>::max() - 1)
9293
.Case(section_names::ehFrame, std::numeric_limits<int>::max())
9394
.Default(osec->inputOrder);

lld/MachO/Symbols.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,14 @@ T *replaceSymbol(Symbol *s, ArgT &&...arg) {
346346
return sym;
347347
}
348348

349+
// Can a symbol's address only be resolved at runtime?
350+
inline bool needsBinding(const Symbol *sym) {
351+
if (isa<DylibSymbol>(sym))
352+
return true;
353+
if (const auto *defined = dyn_cast<Defined>(sym))
354+
return defined->isExternalWeakDef() || defined->interposable;
355+
return false;
356+
}
349357
} // namespace macho
350358

351359
std::string toString(const macho::Symbol &);

lld/MachO/SyntheticSections.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1816,6 +1816,74 @@ void ObjCImageInfoSection::writeTo(uint8_t *buf) const {
18161816
write32le(buf + 4, flags);
18171817
}
18181818

1819+
InitOffsetsSection::InitOffsetsSection()
1820+
: SyntheticSection(segment_names::text, section_names::initOffsets) {
1821+
flags = S_INIT_FUNC_OFFSETS;
1822+
}
1823+
1824+
uint64_t InitOffsetsSection::getSize() const {
1825+
size_t count = 0;
1826+
for (const ConcatInputSection *isec : sections)
1827+
count += isec->relocs.size();
1828+
return count * sizeof(uint32_t);
1829+
}
1830+
1831+
void InitOffsetsSection::writeTo(uint8_t *buf) const {
1832+
uint64_t textVA = 0;
1833+
for (const OutputSegment *oseg : outputSegments)
1834+
if (oseg->name == segment_names::text) {
1835+
textVA = oseg->addr;
1836+
break;
1837+
}
1838+
1839+
// FIXME: Add function specified by -init when that argument is implemented.
1840+
for (ConcatInputSection *isec : sections) {
1841+
for (const Reloc &rel : isec->relocs) {
1842+
const Symbol *referent = rel.referent.dyn_cast<Symbol *>();
1843+
assert(referent && "section relocation should have been rejected");
1844+
uint64_t offset = referent->getVA() - textVA;
1845+
// FIXME: Can we handle this gracefully?
1846+
if (offset > UINT32_MAX)
1847+
fatal(isec->getLocation(rel.offset) + ": offset to initializer " +
1848+
referent->getName() + " (" + utohexstr(offset) +
1849+
") does not fit in 32 bits");
1850+
1851+
// Entries need to be added in the order they appear in the section, but
1852+
// relocations aren't guaranteed to be sorted.
1853+
size_t index = rel.offset >> target->p2WordSize;
1854+
write32le(&buf[index * sizeof(uint32_t)], offset);
1855+
}
1856+
buf += isec->relocs.size() * sizeof(uint32_t);
1857+
}
1858+
}
1859+
1860+
// The inputs are __mod_init_func sections, which contain pointers to
1861+
// initializer functions, therefore all relocations should be of the UNSIGNED
1862+
// type. InitOffsetsSection stores offsets, so if the initializer's address is
1863+
// not known at link time, stub-indirection has to be used.
1864+
void InitOffsetsSection::setUp() {
1865+
for (const ConcatInputSection *isec : sections) {
1866+
for (const Reloc &rel : isec->relocs) {
1867+
RelocAttrs attrs = target->getRelocAttrs(rel.type);
1868+
if (!attrs.hasAttr(RelocAttrBits::UNSIGNED))
1869+
error(isec->getLocation(rel.offset) +
1870+
": unsupported relocation type: " + attrs.name);
1871+
if (rel.addend != 0)
1872+
error(isec->getLocation(rel.offset) +
1873+
": relocation addend is not representable in __init_offsets");
1874+
if (rel.referent.is<InputSection *>())
1875+
error(isec->getLocation(rel.offset) +
1876+
": unexpected section relocation");
1877+
1878+
Symbol *sym = rel.referent.dyn_cast<Symbol *>();
1879+
if (auto *undefined = dyn_cast<Undefined>(sym))
1880+
treatUndefinedSymbol(*undefined, isec, rel.offset);
1881+
if (needsBinding(sym))
1882+
in.stubs->addEntry(sym);
1883+
}
1884+
}
1885+
}
1886+
18191887
void macho::createSyntheticSymbols() {
18201888
auto addHeaderSymbol = [](const char *name) {
18211889
symtab->addSynthetic(name, in.header->isec, /*value=*/0,

lld/MachO/SyntheticSections.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,32 @@ class ObjCImageInfoSection final : public SyntheticSection {
647647
std::vector<const InputFile *> files; // files with image info
648648
};
649649

650+
// This section stores 32-bit __TEXT segment offsets of initializer functions.
651+
//
652+
// The compiler stores pointers to initializers in __mod_init_func. These need
653+
// to be fixed up at load time, which takes time and dirties memory. By
654+
// synthesizing InitOffsetsSection from them, this data can live in the
655+
// read-only __TEXT segment instead. This section is used by default when
656+
// chained fixups are enabled.
657+
//
658+
// There is no similar counterpart to __mod_term_func, as that section is
659+
// deprecated, and static destructors are instead handled by registering them
660+
// via __cxa_atexit from an autogenerated initializer function (see D121736).
661+
class InitOffsetsSection final : public SyntheticSection {
662+
public:
663+
InitOffsetsSection();
664+
bool isNeeded() const override { return !sections.empty(); }
665+
uint64_t getSize() const override;
666+
void writeTo(uint8_t *buf) const override;
667+
void setUp();
668+
669+
void addInput(ConcatInputSection *isec) { sections.push_back(isec); }
670+
const std::vector<ConcatInputSection *> &inputs() const { return sections; }
671+
672+
private:
673+
std::vector<ConcatInputSection *> sections;
674+
};
675+
650676
struct InStruct {
651677
const uint8_t *bufferStart = nullptr;
652678
MachHeaderSection *header = nullptr;
@@ -668,6 +694,7 @@ struct InStruct {
668694
UnwindInfoSection *unwindInfo = nullptr;
669695
ObjCImageInfoSection *objCImageInfo = nullptr;
670696
ConcatInputSection *imageLoaderCache = nullptr;
697+
InitOffsetsSection *initOffsets = nullptr;
671698
};
672699

673700
extern InStruct in;

lld/MachO/Writer.cpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -575,15 +575,6 @@ void Writer::treatSpecialUndefineds() {
575575
}
576576
}
577577

578-
// Can a symbol's address can only be resolved at runtime?
579-
static bool needsBinding(const Symbol *sym) {
580-
if (isa<DylibSymbol>(sym))
581-
return true;
582-
if (const auto *defined = dyn_cast<Defined>(sym))
583-
return defined->isExternalWeakDef() || defined->interposable;
584-
return false;
585-
}
586-
587578
static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec,
588579
const lld::macho::Reloc &r) {
589580
assert(sym->isLive());
@@ -1141,6 +1132,8 @@ template <class LP> void Writer::run() {
11411132
if (in.objcStubs->isNeeded())
11421133
in.objcStubs->setUp();
11431134
scanRelocations();
1135+
if (in.initOffsets->isNeeded())
1136+
in.initOffsets->setUp();
11441137

11451138
// Do not proceed if there was an undefined symbol.
11461139
reportPendingUndefinedSymbols();
@@ -1204,6 +1197,7 @@ void macho::createSyntheticSections() {
12041197
in.objcStubs = make<ObjCStubsSection>();
12051198
in.unwindInfo = makeUnwindInfoSection();
12061199
in.objCImageInfo = make<ObjCImageInfoSection>();
1200+
in.initOffsets = make<InitOffsetsSection>();
12071201

12081202
// This section contains space for just a single word, and will be used by
12091203
// dyld to cache an address to the image loader it uses.

lld/test/MachO/init-offsets.s

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# REQUIRES: x86
2+
# RUN: rm -rf %t; split-file %s %t
3+
4+
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/first.s -o %t/first.o
5+
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/second.s -o %t/second.o
6+
7+
# RUN: %lld -lSystem -init_offsets -undefined dynamic_lookup %t/first.o %t/second.o -o %t/out
8+
# RUN: llvm-otool -lv %t/out | FileCheck --check-prefix=FLAGS --implicit-check-not=__mod_init_func %s
9+
# RUN: llvm-otool -l %t/out > %t/dump.txt
10+
# RUN: llvm-objdump --macho --print-imm-hex --section=__TEXT,__stubs %t/out >> %t/dump.txt
11+
# RUN: llvm-objdump --macho --syms %t/out >> %t/dump.txt
12+
# RUN: llvm-objcopy --dump-section=__TEXT,__init_offsets=%t/section.bin %t/out
13+
# RUN: echo "__TEXT,__init_offsets contents:" >> %t/dump.txt
14+
# RUN: od -An -txI %t/section.bin >> %t/dump.txt
15+
# RUN: FileCheck --check-prefix=CONTENT %s < %t/dump.txt
16+
17+
## This test checks that:
18+
## - __mod_init_func is replaced by __init_offsets.
19+
## - __mod_init_func has type S_INIT_FUNC_OFFSETS.
20+
## - initializers show up in the order their parent objects are specified on the
21+
## command line, and in the order they show up within __mod_init_func.
22+
## - for undefined and dylib symbols, stubs are created, and the offsets point to those.
23+
## - offsets are relative to __TEXT's address, they aren't an absolute virtual address.
24+
25+
# FLAGS: sectname __init_offsets
26+
# FLAGS-NEXT: segname __TEXT
27+
# FLAGS-NEXT: addr
28+
# FLAGS-NEXT: size 0x0000000000000010
29+
# FLAGS-NEXT: offset
30+
# FLAGS-NEXT: align
31+
# FLAGS-NEXT: reloff 0
32+
# FLAGS-NEXT: nreloc 0
33+
# FLAGS-NEXT: type S_INIT_FUNC_OFFSETS
34+
35+
# CONTENT: segname __TEXT
36+
# CONTENT-NEXT: 0x[[#%x, TEXT:]]
37+
38+
# CONTENT: Contents of (__TEXT,__stubs) section
39+
# CONTENT-NEXT: [[#%x, ISNAN:]]: {{.*}} ## literal pool symbol address: ___isnan
40+
# CONTENT-NEXT: [[#%x, UNDEF:]]: {{.*}} ## literal pool symbol address: _undefined
41+
42+
# CONTENT: SYMBOL TABLE:
43+
# CONTENT: [[#%x, FIRST:]] g F __TEXT,__text _first_init
44+
# CONTENT: [[#%x, SECOND:]] g F __TEXT,__text _second_init
45+
46+
# CONTENT: __TEXT,__init_offsets contents:
47+
# CONTENT: [[#%.8x, FIRST - TEXT]] [[#%.8x, ISNAN - TEXT]] [[#%.8x, UNDEF - TEXT]] [[#%.8x, SECOND - TEXT]]
48+
49+
#--- first.s
50+
.globl _first_init, ___isnan, _main
51+
.text
52+
_first_init:
53+
ret
54+
_main:
55+
ret
56+
57+
.section __DATA,__mod_init_func,mod_init_funcs
58+
.quad _first_init
59+
.quad ___isnan
60+
61+
.subsections_via_symbols
62+
63+
#--- second.s
64+
.globl _second_init, _undefined
65+
.text
66+
_second_init:
67+
ret
68+
69+
.section __DATA,__mod_init_func,mod_init_funcs
70+
.quad _undefined
71+
.quad _second_init
72+
73+
.subsections_via_symbols

llvm/include/llvm/BinaryFormat/MachO.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,11 @@ enum SectionType : uint32_t {
175175
/// S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - Section with thread local
176176
/// variable initialization pointers to functions.
177177
S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15u,
178+
/// S_INIT_FUNC_OFFSETS - Section with 32-bit offsets to initializer
179+
/// functions.
180+
S_INIT_FUNC_OFFSETS = 0x16u,
178181

179-
LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS
182+
LAST_KNOWN_SECTION_TYPE = S_INIT_FUNC_OFFSETS
180183
};
181184

182185
enum : uint32_t {

llvm/lib/MC/MCSectionMachO.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ static constexpr struct {
6262
StringLiteral("S_THREAD_LOCAL_VARIABLE_POINTERS")}, // 0x14
6363
{StringLiteral("thread_local_init_function_pointers"),
6464
StringLiteral("S_THREAD_LOCAL_INIT_FUNCTION_POINTERS")}, // 0x15
65+
{StringLiteral("") /* linker-synthesized */,
66+
StringLiteral("S_INIT_FUNC_OFFSETS")}, // 0x16
6567
};
6668

6769
/// SectionAttrDescriptors - This is an array of descriptors for section

llvm/tools/llvm-objdump/MachODump.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8955,6 +8955,8 @@ static void PrintSection(const char *sectname, const char *segname,
89558955
outs() << " S_THREAD_LOCAL_VARIABLE_POINTERS\n";
89568956
else if (section_type == MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS)
89578957
outs() << " S_THREAD_LOCAL_INIT_FUNCTION_POINTERS\n";
8958+
else if (section_type == MachO::S_INIT_FUNC_OFFSETS)
8959+
outs() << " S_INIT_FUNC_OFFSETS\n";
89588960
else
89598961
outs() << format("0x%08" PRIx32, section_type) << "\n";
89608962
outs() << "attributes";

0 commit comments

Comments
 (0)