Skip to content

Commit f1e4e2f

Browse files
author
Alexander Shaposhnikov
committed
[lld][MachO] Refactor handling of subsections
This diff is a preparation for fixing FunStabs (incorrect size calculation). std::map<uint32_t, InputSection*> (SubsectionMap) is replaced with a sorted vector + binary search. If .subsections_via_symbols is set this vector will contain the list of subsections, otherwise, the offsets will be used for calculating the symbols sizes. Test plan: make check-all Differential revision: https://reviews.llvm.org/D98837
1 parent 4af4828 commit f1e4e2f

File tree

4 files changed

+78
-70
lines changed

4 files changed

+78
-70
lines changed

lld/MachO/Driver.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,12 +1119,9 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
11191119
TimeTraceScope timeScope("Gathering input sections");
11201120
// Gather all InputSections into one vector.
11211121
for (const InputFile *file : inputFiles) {
1122-
for (const SubsectionMap &map : file->subsections) {
1123-
for (const auto &p : map) {
1124-
InputSection *isec = p.second;
1125-
inputSections.push_back(isec);
1126-
}
1127-
}
1122+
for (const SubsectionMapping &map : file->subsections)
1123+
for (const SubsectionEntry &subsectionEntry : map)
1124+
inputSections.push_back(subsectionEntry.isec);
11281125
}
11291126
}
11301127

lld/MachO/InputFiles.cpp

Lines changed: 65 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -194,11 +194,14 @@ void ObjFile::parseSections(ArrayRef<section_64> sections) {
194194
// any subsection splitting has occurred). It will be updated to represent the
195195
// same location as an offset relative to the start of the containing
196196
// subsection.
197-
static InputSection *findContainingSubsection(SubsectionMap &map,
198-
uint32_t *offset) {
199-
auto it = std::prev(map.upper_bound(*offset));
200-
*offset -= it->first;
201-
return it->second;
197+
static InputSection *findContainingSubsection(SubsectionMapping &map,
198+
uint64_t *offset) {
199+
auto it = std::prev(llvm::upper_bound(
200+
map, *offset, [](uint64_t value, SubsectionEntry subsectionEntry) {
201+
return value < subsectionEntry.offset;
202+
}));
203+
*offset -= it->offset;
204+
return it->isec;
202205
}
203206

204207
static bool validateRelocationInfo(InputFile *file, const section_64 &sec,
@@ -233,7 +236,7 @@ static bool validateRelocationInfo(InputFile *file, const section_64 &sec,
233236
}
234237

235238
void ObjFile::parseRelocations(const section_64 &sec,
236-
SubsectionMap &subsecMap) {
239+
SubsectionMapping &subsecMap) {
237240
auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
238241
ArrayRef<relocation_info> relInfos(
239242
reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc);
@@ -288,9 +291,10 @@ void ObjFile::parseRelocations(const section_64 &sec,
288291
r.referent = symbols[relInfo.r_symbolnum];
289292
r.addend = totalAddend;
290293
} else {
291-
SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1];
294+
SubsectionMapping &referentSubsecMap =
295+
subsections[relInfo.r_symbolnum - 1];
292296
const section_64 &referentSec = sectionHeaders[relInfo.r_symbolnum - 1];
293-
uint32_t referentOffset;
297+
uint64_t referentOffset;
294298
if (relInfo.r_pcrel) {
295299
// The implicit addend for pcrel section relocations is the pcrel offset
296300
// in terms of the addresses in the input file. Here we adjust it so
@@ -328,7 +332,7 @@ void ObjFile::parseRelocations(const section_64 &sec,
328332

329333
static macho::Symbol *createDefined(const structs::nlist_64 &sym,
330334
StringRef name, InputSection *isec,
331-
uint32_t value) {
335+
uint64_t value) {
332336
// Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT):
333337
// N_EXT: Global symbols
334338
// N_EXT | N_PEXT: Linkage unit (think: dylib) scoped
@@ -410,11 +414,51 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const structs::nlist_64 &sym,
410414

411415
void ObjFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
412416
const char *strtab, bool subsectionsViaSymbols) {
413-
// resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols
414-
// out-of-sequence.
415-
symbols.resize(nList.size());
416-
std::vector<size_t> altEntrySymIdxs;
417+
// Precompute the boundaries of symbols within a section.
418+
// If subsectionsViaSymbols is True then the corresponding subsections will be
419+
// created, otherwise these boundaries are used for the calculation of symbols
420+
// sizes only.
421+
422+
for (const structs::nlist_64 &sym : nList) {
423+
if ((sym.n_type & N_TYPE) == N_SECT && !(sym.n_desc & N_ALT_ENTRY) &&
424+
!subsections[sym.n_sect - 1].empty()) {
425+
SubsectionMapping &subsectionMapping = subsections[sym.n_sect - 1];
426+
subsectionMapping.push_back(
427+
{sym.n_value - sectionHeaders[sym.n_sect - 1].addr,
428+
subsectionMapping.front().isec});
429+
}
430+
}
417431

432+
for (SubsectionMapping &subsectionMap : subsections) {
433+
if (subsectionMap.empty())
434+
continue;
435+
llvm::sort(subsectionMap,
436+
[](const SubsectionEntry &lhs, const SubsectionEntry &rhs) {
437+
return lhs.offset < rhs.offset;
438+
});
439+
subsectionMap.erase(
440+
std::unique(subsectionMap.begin(), subsectionMap.end(),
441+
[](const SubsectionEntry &lhs, const SubsectionEntry &rhs) {
442+
return lhs.offset == rhs.offset;
443+
}),
444+
subsectionMap.end());
445+
if (!subsectionsViaSymbols)
446+
continue;
447+
for (size_t i = 0; i < subsectionMap.size(); ++i) {
448+
uint32_t offset = subsectionMap[i].offset;
449+
InputSection *&isec = subsectionMap[i].isec;
450+
uint32_t end = i + 1 < subsectionMap.size() ? subsectionMap[i + 1].offset
451+
: isec->data.size();
452+
isec = make<InputSection>(*isec);
453+
isec->data = isec->data.slice(offset, end - offset);
454+
// TODO: ld64 appears to preserve the original alignment as well as each
455+
// subsection's offset from the last aligned address. We should consider
456+
// emulating that behavior.
457+
isec->align = MinAlign(isec->align, offset);
458+
}
459+
}
460+
461+
symbols.resize(nList.size());
418462
for (size_t i = 0, n = nList.size(); i < n; ++i) {
419463
const structs::nlist_64 &sym = nList[i];
420464
StringRef name = strtab + sym.n_strx;
@@ -425,7 +469,7 @@ void ObjFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
425469
}
426470

427471
const section_64 &sec = sectionHeaders[sym.n_sect - 1];
428-
SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
472+
SubsectionMapping &subsecMap = subsections[sym.n_sect - 1];
429473

430474
// parseSections() may have chosen not to parse this section.
431475
if (subsecMap.empty())
@@ -437,55 +481,18 @@ void ObjFile::parseSymbols(ArrayRef<structs::nlist_64> nList,
437481
// use the same subsection. Otherwise, we must split the sections along
438482
// symbol boundaries.
439483
if (!subsectionsViaSymbols) {
440-
symbols[i] = createDefined(sym, name, subsecMap[0], offset);
484+
symbols[i] = createDefined(sym, name, subsecMap.front().isec, offset);
441485
continue;
442486
}
443487

444-
// nList entries aren't necessarily arranged in address order. Therefore,
445-
// we can't create alt-entry symbols at this point because a later symbol
446-
// may split its section, which may affect which subsection the alt-entry
447-
// symbol is assigned to. So we need to handle them in a second pass below.
448-
if (sym.n_desc & N_ALT_ENTRY) {
449-
altEntrySymIdxs.push_back(i);
450-
continue;
451-
}
452-
453-
// Find the subsection corresponding to the greatest section offset that is
454-
// <= that of the current symbol. The subsection that we find either needs
455-
// to be used directly or split in two.
456-
uint32_t firstSize = offset;
457-
InputSection *firstIsec = findContainingSubsection(subsecMap, &firstSize);
458-
459-
if (firstSize == 0) {
460-
// Alias of an existing symbol, or the first symbol in the section. These
461-
// are handled by reusing the existing section.
462-
symbols[i] = createDefined(sym, name, firstIsec, 0);
463-
continue;
464-
}
465-
466-
// We saw a symbol definition at a new offset. Split the section into two
467-
// subsections. The new symbol uses the second subsection.
468-
auto *secondIsec = make<InputSection>(*firstIsec);
469-
secondIsec->data = firstIsec->data.slice(firstSize);
470-
firstIsec->data = firstIsec->data.slice(0, firstSize);
471-
// TODO: ld64 appears to preserve the original alignment as well as each
472-
// subsection's offset from the last aligned address. We should consider
473-
// emulating that behavior.
474-
secondIsec->align = MinAlign(firstIsec->align, offset);
475-
476-
subsecMap[offset] = secondIsec;
477-
// By construction, the symbol will be at offset zero in the new section.
478-
symbols[i] = createDefined(sym, name, secondIsec, 0);
488+
InputSection *subsec = findContainingSubsection(subsecMap, &offset);
489+
symbols[i] = createDefined(sym, name, subsec, offset);
479490
}
480491

481-
for (size_t idx : altEntrySymIdxs) {
482-
const structs::nlist_64 &sym = nList[idx];
483-
StringRef name = strtab + sym.n_strx;
484-
SubsectionMap &subsecMap = subsections[sym.n_sect - 1];
485-
uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr;
486-
InputSection *subsec = findContainingSubsection(subsecMap, &off);
487-
symbols[idx] = createDefined(sym, name, subsec, off);
488-
}
492+
if (!subsectionsViaSymbols)
493+
for (SubsectionMapping &subsectionMap : subsections)
494+
if (!subsectionMap.empty())
495+
subsectionMap = {subsectionMap.front()};
489496
}
490497

491498
OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName,

lld/MachO/InputFiles.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,13 @@ enum class RefState : uint8_t;
4747
extern std::unique_ptr<llvm::TarWriter> tar;
4848

4949
// If .subsections_via_symbols is set, each InputSection will be split along
50-
// symbol boundaries. The keys of a SubsectionMap represent the offsets of
51-
// each subsection from the start of the original pre-split InputSection.
52-
using SubsectionMap = std::map<uint32_t, InputSection *>;
50+
// symbol boundaries. The field offset represents the offset of the subsection
51+
// from the start of the original pre-split InputSection.
52+
struct SubsectionEntry {
53+
uint64_t offset;
54+
InputSection *isec;
55+
};
56+
using SubsectionMapping = std::vector<SubsectionEntry>;
5357

5458
class InputFile {
5559
public:
@@ -68,7 +72,7 @@ class InputFile {
6872
MemoryBufferRef mb;
6973

7074
std::vector<Symbol *> symbols;
71-
std::vector<SubsectionMap> subsections;
75+
std::vector<SubsectionMapping> subsections;
7276
// Provides an easy way to sort InputFiles deterministically.
7377
const int id;
7478

@@ -105,7 +109,7 @@ class ObjFile : public InputFile {
105109
void parseSymbols(ArrayRef<lld::structs::nlist_64> nList, const char *strtab,
106110
bool subsectionsViaSymbols);
107111
Symbol *parseNonSectionSymbol(const structs::nlist_64 &sym, StringRef name);
108-
void parseRelocations(const llvm::MachO::section_64 &, SubsectionMap &);
112+
void parseRelocations(const llvm::MachO::section_64 &, SubsectionMapping &);
109113
void parseDebugInfo();
110114
};
111115

lld/MachO/Relocations.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ struct Reloc {
5656
uint8_t length = 0;
5757
// The offset from the start of the subsection that this relocation belongs
5858
// to.
59-
uint32_t offset = 0;
59+
uint64_t offset = 0;
6060
// Adding this offset to the address of the referent symbol or subsection
6161
// gives the destination that this relocation refers to.
6262
int64_t addend = 0;

0 commit comments

Comments
 (0)