Skip to content

Commit 817d98d

Browse files
committed
[lld-macho][nfc] Refactor in preparation for 32-bit support
The main challenge was handling the different on-disk structures (e.g. `mach_header` vs `mach_header_64`). I tried to strike a balance between sprinkling `target->wordSize == 8` checks everywhere (branchy = slow, and ugly) and templatizing everything (causes code bloat, also ugly). I think I struck a decent balance by judicious use of type erasure. Note that LLD-ELF has a similar architecture, though it seems to use more templating. Linking chromium_framework takes about the same time before and after this change: N Min Max Median Avg Stddev x 20 4.52 4.67 4.595 4.5945 0.044423204 + 20 4.5 4.71 4.575 4.582 0.056344803 No difference proven at 95.0% confidence Reviewed By: #lld-macho, oontvoo Differential Revision: https://reviews.llvm.org/D99633
1 parent 8156d89 commit 817d98d

16 files changed

+285
-140
lines changed

lld/MachO/Arch/ARM64.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ namespace {
2828
struct ARM64 : TargetInfo {
2929
ARM64();
3030

31-
int64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
31+
int64_t getEmbeddedAddend(MemoryBufferRef, uint64_t offset,
3232
const relocation_info) const override;
3333
void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
3434
uint64_t pc) const override;
@@ -77,7 +77,7 @@ const RelocAttrs &ARM64::getRelocAttrs(uint8_t type) const {
7777
return relocAttrsArray[type];
7878
}
7979

80-
int64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
80+
int64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, uint64_t offset,
8181
const relocation_info rel) const {
8282
if (rel.r_type != ARM64_RELOC_UNSIGNED &&
8383
rel.r_type != ARM64_RELOC_SUBTRACTOR) {
@@ -88,7 +88,7 @@ int64_t ARM64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
8888
}
8989

9090
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
91-
const uint8_t *loc = buf + sec.offset + rel.r_address;
91+
const uint8_t *loc = buf + offset + rel.r_address;
9292
switch (rel.r_length) {
9393
case 2:
9494
return static_cast<int32_t>(read32le(loc));
@@ -221,7 +221,8 @@ void ARM64::writeStub(uint8_t *buf8, const Symbol &sym) const {
221221
auto *buf32 = reinterpret_cast<uint32_t *>(buf8);
222222
uint64_t pcPageBits =
223223
pageBits(in.stubs->addr + sym.stubsIndex * sizeof(stubCode));
224-
uint64_t lazyPointerVA = in.lazyPointers->addr + sym.stubsIndex * WordSize;
224+
uint64_t lazyPointerVA =
225+
in.lazyPointers->addr + sym.stubsIndex * LP64::wordSize;
225226
buf32[0] = encodePage21({&sym, "stub"}, stubCode[0],
226227
pageBits(lazyPointerVA) - pcPageBits);
227228
buf32[1] = encodePageOff12(stubCode[1], lazyPointerVA);
@@ -249,7 +250,7 @@ void ARM64::writeStubHelperHeader(uint8_t *buf8) const {
249250
buf32[1] = encodePageOff12(stubHelperHeaderCode[1], loaderVA);
250251
buf32[2] = stubHelperHeaderCode[2];
251252
uint64_t binderVA =
252-
in.got->addr + in.stubHelper->stubBinder->gotIndex * WordSize;
253+
in.got->addr + in.stubHelper->stubBinder->gotIndex * LP64::wordSize;
253254
buf32[3] = encodePage21(d, stubHelperHeaderCode[3],
254255
pageBits(binderVA) - pcPageBits(3));
255256
buf32[4] = encodePageOff12(stubHelperHeaderCode[4], binderVA);
@@ -291,7 +292,7 @@ void ARM64::relaxGotLoad(uint8_t *loc, uint8_t type) const {
291292
write32le(loc, instruction);
292293
}
293294

294-
ARM64::ARM64() {
295+
ARM64::ARM64() : TargetInfo(LP64()) {
295296
cpuType = CPU_TYPE_ARM64;
296297
cpuSubtype = CPU_SUBTYPE_ARM64_ALL;
297298

lld/MachO/Arch/X86_64.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ namespace {
2525
struct X86_64 : TargetInfo {
2626
X86_64();
2727

28-
int64_t getEmbeddedAddend(MemoryBufferRef, const section_64 &,
28+
int64_t getEmbeddedAddend(MemoryBufferRef, uint64_t offset,
2929
const relocation_info) const override;
3030
void relocateOne(uint8_t *loc, const Reloc &, uint64_t va,
3131
uint64_t relocVA) const override;
@@ -77,10 +77,10 @@ static int pcrelOffset(uint8_t type) {
7777
}
7878
}
7979

80-
int64_t X86_64::getEmbeddedAddend(MemoryBufferRef mb, const section_64 &sec,
80+
int64_t X86_64::getEmbeddedAddend(MemoryBufferRef mb, uint64_t offset,
8181
relocation_info rel) const {
8282
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
83-
const uint8_t *loc = buf + sec.offset + rel.r_address;
83+
const uint8_t *loc = buf + offset + rel.r_address;
8484

8585
switch (rel.r_length) {
8686
case 2:
@@ -142,7 +142,7 @@ void X86_64::writeStub(uint8_t *buf, const Symbol &sym) const {
142142
memcpy(buf, stub, 2); // just copy the two nonzero bytes
143143
uint64_t stubAddr = in.stubs->addr + sym.stubsIndex * sizeof(stub);
144144
writeRipRelative({&sym, "stub"}, buf, stubAddr, sizeof(stub),
145-
in.lazyPointers->addr + sym.stubsIndex * WordSize);
145+
in.lazyPointers->addr + sym.stubsIndex * LP64::wordSize);
146146
}
147147

148148
static constexpr uint8_t stubHelperHeader[] = {
@@ -159,7 +159,7 @@ void X86_64::writeStubHelperHeader(uint8_t *buf) const {
159159
in.imageLoaderCache->getVA());
160160
writeRipRelative(d, buf, in.stubHelper->addr, 0xf,
161161
in.got->addr +
162-
in.stubHelper->stubBinder->gotIndex * WordSize);
162+
in.stubHelper->stubBinder->gotIndex * LP64::wordSize);
163163
}
164164

165165
static constexpr uint8_t stubHelperEntry[] = {
@@ -182,7 +182,7 @@ void X86_64::relaxGotLoad(uint8_t *loc, uint8_t type) const {
182182
loc[-2] = 0x8d;
183183
}
184184

185-
X86_64::X86_64() {
185+
X86_64::X86_64() : TargetInfo(LP64()) {
186186
cpuType = CPU_TYPE_X86_64;
187187
cpuSubtype = CPU_SUBTYPE_X86_64_ALL;
188188

lld/MachO/Driver.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,7 +1104,11 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
11041104
"\n>>> referenced from option -exported_symbol(s_list)");
11051105
}
11061106

1107-
createSyntheticSections();
1107+
if (target->wordSize == 8)
1108+
createSyntheticSections<LP64>();
1109+
else
1110+
createSyntheticSections<ILP32>();
1111+
11081112
createSyntheticSymbols();
11091113

11101114
for (const Arg *arg : args.filtered(OPT_sectcreate)) {
@@ -1127,7 +1131,10 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
11271131
}
11281132

11291133
// Write to an output file.
1130-
writeResult();
1134+
if (target->wordSize == 8)
1135+
writeResult<LP64>();
1136+
else
1137+
writeResult<ILP32>();
11311138

11321139
depTracker->write(getLLDVersion(), inputFiles, config->outputFile);
11331140
}

lld/MachO/DriverUtils.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "Driver.h"
1111
#include "InputFiles.h"
1212
#include "ObjC.h"
13+
#include "Target.h"
1314

1415
#include "lld/Common/Args.h"
1516
#include "lld/Common/ErrorHandler.h"

lld/MachO/InputFiles.cpp

Lines changed: 62 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -153,11 +153,12 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) {
153153
InputFile::InputFile(Kind kind, const InterfaceFile &interface)
154154
: id(idCount++), fileKind(kind), name(saver.save(interface.getPath())) {}
155155

156-
void ObjFile::parseSections(ArrayRef<section_64> sections) {
156+
template <class Section>
157+
void ObjFile::parseSections(ArrayRef<Section> sections) {
157158
subsections.reserve(sections.size());
158159
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
159160

160-
for (const section_64 &sec : sections) {
161+
for (const Section &sec : sections) {
161162
InputSection *isec = make<InputSection>();
162163
isec->file = this;
163164
isec->name =
@@ -204,7 +205,8 @@ static InputSection *findContainingSubsection(SubsectionMapping &map,
204205
return it->isec;
205206
}
206207

207-
static bool validateRelocationInfo(InputFile *file, const section_64 &sec,
208+
template <class Section>
209+
static bool validateRelocationInfo(InputFile *file, const Section &sec,
208210
relocation_info rel) {
209211
const RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type);
210212
bool valid = true;
@@ -235,7 +237,9 @@ static bool validateRelocationInfo(InputFile *file, const section_64 &sec,
235237
return valid;
236238
}
237239

238-
void ObjFile::parseRelocations(const section_64 &sec,
240+
template <class Section>
241+
void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders,
242+
const Section &sec,
239243
SubsectionMapping &subsecMap) {
240244
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
241245
ArrayRef<relocation_info> relInfos(
@@ -279,7 +283,7 @@ void ObjFile::parseRelocations(const section_64 &sec,
279283
if (relInfo.r_address & R_SCATTERED)
280284
fatal("TODO: Scattered relocations not supported");
281285

282-
int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec, relInfo);
286+
int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec.offset, relInfo);
283287
assert(!(embeddedAddend && pairedAddend));
284288
int64_t totalAddend = pairedAddend + embeddedAddend;
285289
Reloc r;
@@ -293,7 +297,7 @@ void ObjFile::parseRelocations(const section_64 &sec,
293297
} else {
294298
SubsectionMapping &referentSubsecMap =
295299
subsections[relInfo.r_symbolnum - 1];
296-
const section_64 &referentSec = sectionHeaders[relInfo.r_symbolnum - 1];
300+
const Section &referentSec = sectionHeaders[relInfo.r_symbolnum - 1];
297301
uint64_t referentOffset;
298302
if (relInfo.r_pcrel) {
299303
// The implicit addend for pcrel section relocations is the pcrel offset
@@ -330,9 +334,10 @@ void ObjFile::parseRelocations(const section_64 &sec,
330334
}
331335
}
332336

333-
static macho::Symbol *createDefined(const structs::nlist_64 &sym,
334-
StringRef name, InputSection *isec,
335-
uint64_t value, uint64_t size) {
337+
template <class NList>
338+
static macho::Symbol *createDefined(const NList &sym, StringRef name,
339+
InputSection *isec, uint64_t value,
340+
uint64_t size) {
336341
// Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT):
337342
// N_EXT: Global symbols
338343
// N_EXT | N_PEXT: Linkage unit (think: dylib) scoped
@@ -378,8 +383,9 @@ static bool hasCompatVersion(const InputFile *input,
378383

379384
// Absolute symbols are defined symbols that do not have an associated
380385
// InputSection. They cannot be weak.
381-
static macho::Symbol *createAbsolute(const structs::nlist_64 &sym,
382-
InputFile *file, StringRef name) {
386+
template <class NList>
387+
static macho::Symbol *createAbsolute(const NList &sym, InputFile *file,
388+
StringRef name) {
383389
if (sym.n_type & (N_EXT | N_PEXT)) {
384390
assert((sym.n_type & N_EXT) && "invalid input");
385391
return symtab->addDefined(name, file, nullptr, sym.n_value, /*size=*/0,
@@ -390,7 +396,8 @@ static macho::Symbol *createAbsolute(const structs::nlist_64 &sym,
390396
/*isExternal=*/false, /*isPrivateExtern=*/false);
391397
}
392398

393-
macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym,
399+
template <class NList>
400+
macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym,
394401
StringRef name) {
395402
uint8_t type = sym.n_type & N_TYPE;
396403
switch (type) {
@@ -414,14 +421,18 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym,
414421
}
415422
}
416423

417-
void ObjFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
424+
template <class LP>
425+
void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
426+
ArrayRef<typename LP::nlist> nList,
418427
const char *strtab, bool subsectionsViaSymbols) {
428+
using Section = typename LP::section;
429+
using NList = typename LP::nlist;
430+
419431
// Precompute the boundaries of symbols within a section.
420432
// If subsectionsViaSymbols is True then the corresponding subsections will be
421433
// created, otherwise these boundaries are used for the calculation of symbols
422434
// sizes only.
423-
424-
for (const structs::nlist_64 &sym : nList) {
435+
for (const NList &sym : nList) {
425436
if ((sym.n_type & N_TYPE) == N_SECT && !(sym.n_desc & N_ALT_ENTRY) &&
426437
!subsections[sym.n_sect - 1].empty()) {
427438
SubsectionMapping &subsectionMapping = subsections[sym.n_sect - 1];
@@ -462,15 +473,15 @@ void ObjFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
462473

463474
symbols.resize(nList.size());
464475
for (size_t i = 0, n = nList.size(); i < n; ++i) {
465-
const structs::nlist_64 &sym = nList[i];
476+
const NList &sym = nList[i];
466477
StringRef name = strtab + sym.n_strx;
467478

468479
if ((sym.n_type & N_TYPE) != N_SECT) {
469480
symbols[i] = parseNonSectionSymbol(sym, name);
470481
continue;
471482
}
472483

473-
const section_64 &sec = sectionHeaders[sym.n_sect - 1];
484+
const Section &sec = sectionHeaders[sym.n_sect - 1];
474485
SubsectionMapping &subsecMap = subsections[sym.n_sect - 1];
475486

476487
// parseSections() may have chosen not to parse this section.
@@ -521,9 +532,20 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,
521532
ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName)
522533
: InputFile(ObjKind, mb), modTime(modTime) {
523534
this->archiveName = std::string(archiveName);
535+
if (target->wordSize == 8)
536+
parse<LP64>();
537+
else
538+
parse<ILP32>();
539+
}
540+
541+
template <class LP> void ObjFile::parse() {
542+
using Header = typename LP::mach_header;
543+
using SegmentCommand = typename LP::segment_command;
544+
using Section = typename LP::section;
545+
using NList = typename LP::nlist;
524546

525547
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
526-
auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
548+
auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
527549

528550
Architecture arch = getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype);
529551
if (arch != config->target.Arch) {
@@ -546,28 +568,29 @@ ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName)
546568
parseLCLinkerOption(this, c->count, data);
547569
}
548570

549-
if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) {
550-
auto *c = reinterpret_cast<const segment_command_64 *>(cmd);
551-
sectionHeaders = ArrayRef<section_64>{
552-
reinterpret_cast<const section_64 *>(c + 1), c->nsects};
571+
ArrayRef<Section> sectionHeaders;
572+
if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) {
573+
auto *c = reinterpret_cast<const SegmentCommand *>(cmd);
574+
sectionHeaders =
575+
ArrayRef<Section>{reinterpret_cast<const Section *>(c + 1), c->nsects};
553576
parseSections(sectionHeaders);
554577
}
555578

556579
// TODO: Error on missing LC_SYMTAB?
557580
if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) {
558581
auto *c = reinterpret_cast<const symtab_command *>(cmd);
559-
ArrayRef<structs::nlist_64> nList(
560-
reinterpret_cast<const structs::nlist_64 *>(buf + c->symoff), c->nsyms);
582+
ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff),
583+
c->nsyms);
561584
const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff;
562585
bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS;
563-
parseSymbols(nList, strtab, subsectionsViaSymbols);
586+
parseSymbols<LP>(sectionHeaders, nList, strtab, subsectionsViaSymbols);
564587
}
565588

566589
// The relocations may refer to the symbols, so we parse them after we have
567590
// parsed all the symbols.
568591
for (size_t i = 0, n = subsections.size(); i < n; ++i)
569592
if (!subsections[i].empty())
570-
parseRelocations(sectionHeaders[i], subsections[i]);
593+
parseRelocations(sectionHeaders, sectionHeaders[i], subsections[i]);
571594

572595
parseDebugInfo();
573596
}
@@ -678,8 +701,16 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
678701
if (umbrella == nullptr)
679702
umbrella = this;
680703

704+
if (target->wordSize == 8)
705+
parse<LP64>(umbrella);
706+
else
707+
parse<ILP32>(umbrella);
708+
}
709+
710+
template <class LP> void DylibFile::parse(DylibFile *umbrella) {
711+
using Header = typename LP::mach_header;
681712
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
682-
auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart());
713+
auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart());
683714

684715
// Initialize dylibName.
685716
if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) {
@@ -716,8 +747,7 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
716747
return;
717748
}
718749

719-
const uint8_t *p =
720-
reinterpret_cast<const uint8_t *>(hdr) + sizeof(mach_header_64);
750+
const uint8_t *p = reinterpret_cast<const uint8_t *>(hdr) + sizeof(Header);
721751
for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
722752
auto *cmd = reinterpret_cast<const load_command *>(p);
723753
p += cmd->cmdsize;
@@ -888,3 +918,6 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mbref)
888918
for (const lto::InputFile::Symbol &objSym : obj->symbols())
889919
symbols.push_back(createBitcodeSymbol(objSym, *this));
890920
}
921+
922+
template void ObjFile::parse<LP64>();
923+
template void DylibFile::parse<LP64>(DylibFile *umbrella);

0 commit comments

Comments
 (0)