Skip to content

Commit 2a3a79c

Browse files
authored
[lld-macho][NFC] Preserve original symbol isec, unwindEntry and size (#88357)
Currently, when moving symbols from one `InputSection` to another (like in ICF) we directly update the symbol's `isec`, `unwindEntry` and `size`. By doing this we lose the original information. This information will be needed in a future change. Since when moving symbols we always set the symbol's `wasCoalesced` and `isec-> replacement`, we can just use this info to conditionally get the information we need at access time.
1 parent a71c1b3 commit 2a3a79c

14 files changed

+134
-120
lines changed

lld/MachO/ICF.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -133,13 +133,13 @@ bool ICF::equalsConstant(const ConcatInputSection *ia,
133133
assert(isa<Defined>(sa));
134134
const auto *da = cast<Defined>(sa);
135135
const auto *db = cast<Defined>(sb);
136-
if (!da->isec || !db->isec) {
136+
if (!da->isec() || !db->isec()) {
137137
assert(da->isAbsolute() && db->isAbsolute());
138138
return da->value + ra.addend == db->value + rb.addend;
139139
}
140-
isecA = da->isec;
140+
isecA = da->isec();
141141
valueA = da->value;
142-
isecB = db->isec;
142+
isecB = db->isec();
143143
valueB = db->value;
144144
} else {
145145
isecA = ra.referent.get<InputSection *>();
@@ -191,10 +191,10 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
191191
const auto *db = cast<Defined>(rb.referent.get<Symbol *>());
192192
if (da->isAbsolute())
193193
return true;
194-
isecA = dyn_cast<ConcatInputSection>(da->isec);
194+
isecA = dyn_cast<ConcatInputSection>(da->isec());
195195
if (!isecA)
196196
return true; // literal sections were checked in equalsConstant.
197-
isecB = cast<ConcatInputSection>(db->isec);
197+
isecB = cast<ConcatInputSection>(db->isec());
198198
} else {
199199
const auto *sa = ra.referent.get<InputSection *>();
200200
const auto *sb = rb.referent.get<InputSection *>();
@@ -212,7 +212,7 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
212212
// info matches. For simplicity, we only handle the case where there are only
213213
// symbols at offset zero within the section (which is typically the case with
214214
// .subsections_via_symbols.)
215-
auto hasUnwind = [](Defined *d) { return d->unwindEntry != nullptr; };
215+
auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; };
216216
const auto *itA = llvm::find_if(ia->symbols, hasUnwind);
217217
const auto *itB = llvm::find_if(ib->symbols, hasUnwind);
218218
if (itA == ia->symbols.end())
@@ -221,8 +221,8 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
221221
return false;
222222
const Defined *da = *itA;
223223
const Defined *db = *itB;
224-
if (da->unwindEntry->icfEqClass[icfPass % 2] !=
225-
db->unwindEntry->icfEqClass[icfPass % 2] ||
224+
if (da->unwindEntry()->icfEqClass[icfPass % 2] !=
225+
db->unwindEntry()->icfEqClass[icfPass % 2] ||
226226
da->value != 0 || db->value != 0)
227227
return false;
228228
auto isZero = [](Defined *d) { return d->value == 0; };
@@ -289,13 +289,13 @@ void ICF::run() {
289289
for (const Reloc &r : isec->relocs) {
290290
if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
291291
if (auto *defined = dyn_cast<Defined>(sym)) {
292-
if (defined->isec) {
292+
if (defined->isec()) {
293293
if (auto *referentIsec =
294-
dyn_cast<ConcatInputSection>(defined->isec))
294+
dyn_cast<ConcatInputSection>(defined->isec()))
295295
hash += defined->value + referentIsec->icfEqClass[icfPass % 2];
296296
else
297-
hash += defined->isec->kind() +
298-
defined->isec->getOffset(defined->value);
297+
hash += defined->isec()->kind() +
298+
defined->isec()->getOffset(defined->value);
299299
} else {
300300
hash += defined->value;
301301
}
@@ -368,8 +368,8 @@ void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {
368368

369369
void macho::markSymAsAddrSig(Symbol *s) {
370370
if (auto *d = dyn_cast_or_null<Defined>(s))
371-
if (d->isec)
372-
d->isec->keepUnique = true;
371+
if (d->isec())
372+
d->isec()->keepUnique = true;
373373
}
374374

375375
void macho::markAddrSigSymbols() {
@@ -430,8 +430,8 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
430430
if (isFoldable) {
431431
foldable.push_back(isec);
432432
for (Defined *d : isec->symbols)
433-
if (d->unwindEntry)
434-
foldable.push_back(d->unwindEntry);
433+
if (d->unwindEntry())
434+
foldable.push_back(d->unwindEntry());
435435

436436
// Some sections have embedded addends that foil ICF's hashing / equality
437437
// checks. (We can ignore embedded addends when doing ICF because the same

lld/MachO/InputFiles.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,7 +1170,7 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
11701170
continue;
11711171
}
11721172
add += sym->value;
1173-
referentIsec = cast<ConcatInputSection>(sym->isec);
1173+
referentIsec = cast<ConcatInputSection>(sym->isec());
11741174
} else {
11751175
referentIsec =
11761176
cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
@@ -1191,7 +1191,7 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
11911191
++it;
11921192
continue;
11931193
}
1194-
d->unwindEntry = isec;
1194+
d->originalUnwindEntry = isec;
11951195
// Now that the symbol points to the unwind entry, we can remove the reloc
11961196
// that points from the unwind entry back to the symbol.
11971197
//
@@ -1348,7 +1348,7 @@ targetSymFromCanonicalSubtractor(const InputSection *isec,
13481348
}
13491349
if (Invert)
13501350
std::swap(pcSym, target);
1351-
if (pcSym->isec == isec) {
1351+
if (pcSym->isec() == isec) {
13521352
if (pcSym->value - (Invert ? -1 : 1) * minuend.addend != subtrahend.offset)
13531353
fatal("invalid FDE relocation in __eh_frame");
13541354
} else {
@@ -1420,7 +1420,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
14201420
// We already have an explicit relocation for the CIE offset.
14211421
cieIsec =
14221422
targetSymFromCanonicalSubtractor</*Invert=*/true>(isec, cieOffRelocIt)
1423-
->isec;
1423+
->isec();
14241424
dataOff += sizeof(uint32_t);
14251425
} else {
14261426
// If we haven't found a relocation, then the CIE offset is most likely
@@ -1480,15 +1480,15 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
14801480
// to register the unwind entry under same symbol.
14811481
// This is not particularly efficient, but we should run into this case
14821482
// infrequently (only when handling the output of `ld -r`).
1483-
if (funcSym->isec)
1484-
funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec),
1483+
if (funcSym->isec())
1484+
funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec()),
14851485
funcSym->value);
14861486
} else {
14871487
funcSym = findSymbolAtAddress(sections, funcAddr);
14881488
ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize);
14891489
}
14901490
// The symbol has been coalesced, or already has a compact unwind entry.
1491-
if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry) {
1491+
if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry()) {
14921492
// We must prune unused FDEs for correctness, so we cannot rely on
14931493
// -dead_strip being enabled.
14941494
isec->live = false;
@@ -1497,7 +1497,8 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
14971497

14981498
InputSection *lsdaIsec = nullptr;
14991499
if (lsdaAddrRelocIt != isec->relocs.end()) {
1500-
lsdaIsec = targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec;
1500+
lsdaIsec =
1501+
targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec();
15011502
} else if (lsdaAddrOpt) {
15021503
uint64_t lsdaAddr = *lsdaAddrOpt;
15031504
Section *sec = findContainingSection(sections, &lsdaAddr);
@@ -1507,7 +1508,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
15071508
}
15081509

15091510
fdes[isec] = {funcLength, cie.personalitySymbol, lsdaIsec};
1510-
funcSym->unwindEntry = isec;
1511+
funcSym->originalUnwindEntry = isec;
15111512
ehRelocator.commit();
15121513
}
15131514

lld/MachO/InputSection.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,10 +194,8 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
194194
copy->live = false;
195195
copy->wasCoalesced = true;
196196
copy->replacement = this;
197-
for (auto &copySym : copy->symbols) {
197+
for (auto &copySym : copy->symbols)
198198
copySym->wasIdenticalCodeFolded = true;
199-
copySym->size = 0;
200-
}
201199

202200
symbols.insert(symbols.end(), copy->symbols.begin(), copy->symbols.end());
203201
copy->symbols.clear();
@@ -207,7 +205,7 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
207205
return;
208206
for (auto it = symbols.begin() + 1; it != symbols.end(); ++it) {
209207
assert((*it)->value == 0);
210-
(*it)->unwindEntry = nullptr;
208+
(*it)->originalUnwindEntry = nullptr;
211209
}
212210
}
213211

lld/MachO/MapFile.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,8 @@ static MapInfo gatherMapInfo() {
7777
// Only emit the prevailing definition of a symbol. Also, don't emit
7878
// the symbol if it is part of a cstring section (we use the literal
7979
// value instead, similar to ld64)
80-
if (d->isec && d->getFile() == file &&
81-
!isa<CStringInputSection>(d->isec)) {
80+
if (d->isec() && d->getFile() == file &&
81+
!isa<CStringInputSection>(d->isec())) {
8282
isReferencedFile = true;
8383
if (!d->isLive())
8484
info.deadSymbols.push_back(d);
@@ -155,6 +155,12 @@ static void printNonLazyPointerSection(raw_fd_ostream &os,
155155
target->wordSize, sym->getName().str().data());
156156
}
157157

158+
static uint64_t getSymSizeForMap(Defined *sym) {
159+
if (sym->wasIdenticalCodeFolded)
160+
return 0;
161+
return sym->size;
162+
}
163+
158164
void macho::writeMapFile() {
159165
if (config->mapFile.empty())
160166
return;
@@ -201,9 +207,10 @@ void macho::writeMapFile() {
201207
auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
202208
for (const ConcatInputSection *isec : arr) {
203209
for (Defined *sym : isec->symbols) {
204-
if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
210+
if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0))
205211
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
206-
sym->size, readerToFileOrdinal[sym->getFile()],
212+
getSymSizeForMap(sym),
213+
readerToFileOrdinal[sym->getFile()],
207214
sym->getName().str().data());
208215
}
209216
}
@@ -255,7 +262,7 @@ void macho::writeMapFile() {
255262
os << "# \tSize \tFile Name\n";
256263
for (Defined *sym : info.deadSymbols) {
257264
assert(!sym->isLive());
258-
os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
265+
os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", getSymSizeForMap(sym),
259266
readerToFileOrdinal[sym->getFile()],
260267
sym->getName().str().data());
261268
}

lld/MachO/MarkLive.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,10 @@ void MarkLiveImpl<RecordWhyLive>::addSym(
110110
if (!config->whyLive.empty() && config->whyLive.match(s->getName()))
111111
printWhyLive(s, prev);
112112
if (auto *d = dyn_cast<Defined>(s)) {
113-
if (d->isec)
114-
enqueue(d->isec, d->value, prev);
115-
if (d->unwindEntry)
116-
enqueue(d->unwindEntry, 0, prev);
113+
if (d->isec())
114+
enqueue(d->isec(), d->value, prev);
115+
if (d->unwindEntry())
116+
enqueue(d->unwindEntry(), 0, prev);
117117
}
118118
}
119119

@@ -179,7 +179,7 @@ void MarkLiveImpl<RecordWhyLive>::markTransitively() {
179179
if (s->isLive()) {
180180
InputSection *referentIsec = nullptr;
181181
if (auto *d = dyn_cast<Defined>(s))
182-
referentIsec = d->isec;
182+
referentIsec = d->isec();
183183
enqueue(isec, 0, makeEntry(referentIsec, nullptr));
184184
}
185185
} else {

lld/MachO/ObjC.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,8 @@ static StringRef getReferentString(const Reloc &r) {
191191
if (auto *isec = r.referent.dyn_cast<InputSection *>())
192192
return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend);
193193
auto *sym = cast<Defined>(r.referent.get<Symbol *>());
194-
return cast<CStringInputSection>(sym->isec)->getStringRefAtOffset(sym->value +
195-
r.addend);
194+
return cast<CStringInputSection>(sym->isec())
195+
->getStringRefAtOffset(sym->value + r.addend);
196196
}
197197

198198
void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
@@ -306,15 +306,15 @@ void ObjcCategoryChecker::parseClass(const Defined *classSym) {
306306
return nullptr;
307307
};
308308

309-
const auto *classIsec = cast<ConcatInputSection>(classSym->isec);
309+
const auto *classIsec = cast<ConcatInputSection>(classSym->isec());
310310

311311
// Parse instance methods.
312312
if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
313313
parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class,
314314
MK_Instance);
315315

316316
// Class methods are contained in the metaclass.
317-
if (const auto *r = classSym->isec->getRelocAt(classLayout.metaClassOffset))
317+
if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))
318318
if (const auto *classMethodsIsec = getMethodsIsec(
319319
cast<ConcatInputSection>(r->getReferentInputSection())))
320320
parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);
@@ -561,9 +561,9 @@ void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
561561
if (!sym)
562562
return;
563563

564-
if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec))
564+
if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
565565
eraseISec(cisec);
566-
else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec)) {
566+
else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
567567
uint32_t totalOffset = sym->value + reloc->addend;
568568
StringPiece &piece = csisec->getStringPiece(totalOffset);
569569
piece.live = false;
@@ -588,7 +588,7 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
588588
assert(catNameSym && "Category does not have a valid name Symbol");
589589

590590
collectSectionWriteInfoFromIsec<CStringSection>(
591-
catNameSym->isec, infoCategoryWriter.catNameInfo);
591+
catNameSym->isec(), infoCategoryWriter.catNameInfo);
592592
}
593593

594594
// Collect writer info from all the category lists (we're assuming they all
@@ -599,7 +599,7 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
599599
if (Defined *ptrList =
600600
tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
601601
collectSectionWriteInfoFromIsec<ConcatOutputSection>(
602-
ptrList->isec, infoCategoryWriter.catPtrListInfo);
602+
ptrList->isec(), infoCategoryWriter.catPtrListInfo);
603603
// we've successfully collected data, so we can break
604604
break;
605605
}
@@ -627,7 +627,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
627627
// platform pointer size, but to simplify implementation we always just read
628628
// the lower 32b which should be good enough.
629629
uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
630-
ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
630+
ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
631631

632632
ptrList.structCount += protocolCount;
633633
ptrList.structSize = target->wordSize;
@@ -636,15 +636,15 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
636636
(protocolCount * target->wordSize) +
637637
/*header(count)*/ protocolListHeaderLayout.totalSize +
638638
/*extra null value*/ target->wordSize;
639-
assert(expectedListSize == ptrListSym->isec->data.size() &&
639+
assert(expectedListSize == ptrListSym->isec()->data.size() &&
640640
"Protocol list does not match expected size");
641641

642642
// Suppress unsuded var warning
643643
(void)expectedListSize;
644644

645645
uint32_t off = protocolListHeaderLayout.totalSize;
646646
for (uint32_t inx = 0; inx < protocolCount; ++inx) {
647-
const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
647+
const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
648648
assert(reloc && "No reloc found at protocol list offset");
649649

650650
auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
@@ -653,7 +653,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
653653
ptrList.allPtrs.push_back(listSym);
654654
off += target->wordSize;
655655
}
656-
assert((ptrListSym->isec->getRelocAt(off) == nullptr) &&
656+
assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
657657
"expected null terminating protocol");
658658
assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
659659
"Protocol list end offset does not match expected size");
@@ -678,9 +678,9 @@ void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
678678
assert(ptrListSym && "Reloc does not have a valid Defined");
679679

680680
uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
681-
ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
681+
ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
682682
uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
683-
ptrListSym->isec->data.data() + listHeaderLayout.structCountOffset);
683+
ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
684684
assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
685685

686686
assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
@@ -690,12 +690,12 @@ void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
690690

691691
uint32_t expectedListSize =
692692
listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
693-
assert(expectedListSize == ptrListSym->isec->data.size() &&
693+
assert(expectedListSize == ptrListSym->isec()->data.size() &&
694694
"Pointer list does not match expected size");
695695

696696
for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
697697
off += target->wordSize) {
698-
const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
698+
const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
699699
assert(reloc && "No reloc found at pointer list offset");
700700

701701
auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
@@ -1054,7 +1054,7 @@ void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
10541054
r.offset = offset;
10551055
r.addend = 0;
10561056
r.referent = const_cast<Symbol *>(refTo);
1057-
refFrom->isec->relocs.push_back(r);
1057+
refFrom->isec()->relocs.push_back(r);
10581058
}
10591059

10601060
void ObjcCategoryMerger::collectAndValidateCategoriesData() {
@@ -1076,7 +1076,7 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
10761076
if (!categorySym->getName().starts_with(objc::symbol_names::category))
10771077
continue;
10781078

1079-
auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec);
1079+
auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
10801080
assert(catBodyIsec &&
10811081
"Category data section is not an ConcatInputSection");
10821082

0 commit comments

Comments
 (0)