-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[lld-macho][NFC] Preserve original symbol isec, unwindEntry and size #88357
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lld @llvm/pr-subscribers-lld-macho Author: None (alx32) ChangesCurrently, when moving symbols from one Patch is 38.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88357.diff 14 Files Affected:
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp
index 0278bf7c6751a2..fc786b571dc64f 100644
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -133,13 +133,13 @@ bool ICF::equalsConstant(const ConcatInputSection *ia,
assert(isa<Defined>(sa));
const auto *da = cast<Defined>(sa);
const auto *db = cast<Defined>(sb);
- if (!da->isec || !db->isec) {
+ if (!da->isec() || !db->isec()) {
assert(da->isAbsolute() && db->isAbsolute());
return da->value + ra.addend == db->value + rb.addend;
}
- isecA = da->isec;
+ isecA = da->isec();
valueA = da->value;
- isecB = db->isec;
+ isecB = db->isec();
valueB = db->value;
} else {
isecA = ra.referent.get<InputSection *>();
@@ -191,10 +191,10 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
const auto *db = cast<Defined>(rb.referent.get<Symbol *>());
if (da->isAbsolute())
return true;
- isecA = dyn_cast<ConcatInputSection>(da->isec);
+ isecA = dyn_cast<ConcatInputSection>(da->isec());
if (!isecA)
return true; // literal sections were checked in equalsConstant.
- isecB = cast<ConcatInputSection>(db->isec);
+ isecB = cast<ConcatInputSection>(db->isec());
} else {
const auto *sa = ra.referent.get<InputSection *>();
const auto *sb = rb.referent.get<InputSection *>();
@@ -212,7 +212,7 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
// info matches. For simplicity, we only handle the case where there are only
// symbols at offset zero within the section (which is typically the case with
// .subsections_via_symbols.)
- auto hasUnwind = [](Defined *d) { return d->unwindEntry != nullptr; };
+ auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; };
const auto *itA = llvm::find_if(ia->symbols, hasUnwind);
const auto *itB = llvm::find_if(ib->symbols, hasUnwind);
if (itA == ia->symbols.end())
@@ -221,8 +221,8 @@ bool ICF::equalsVariable(const ConcatInputSection *ia,
return false;
const Defined *da = *itA;
const Defined *db = *itB;
- if (da->unwindEntry->icfEqClass[icfPass % 2] !=
- db->unwindEntry->icfEqClass[icfPass % 2] ||
+ if (da->unwindEntry()->icfEqClass[icfPass % 2] !=
+ db->unwindEntry()->icfEqClass[icfPass % 2] ||
da->value != 0 || db->value != 0)
return false;
auto isZero = [](Defined *d) { return d->value == 0; };
@@ -289,13 +289,13 @@ void ICF::run() {
for (const Reloc &r : isec->relocs) {
if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
if (auto *defined = dyn_cast<Defined>(sym)) {
- if (defined->isec) {
+ if (defined->isec()) {
if (auto *referentIsec =
- dyn_cast<ConcatInputSection>(defined->isec))
+ dyn_cast<ConcatInputSection>(defined->isec()))
hash += defined->value + referentIsec->icfEqClass[icfPass % 2];
else
- hash += defined->isec->kind() +
- defined->isec->getOffset(defined->value);
+ hash += defined->isec()->kind() +
+ defined->isec()->getOffset(defined->value);
} else {
hash += defined->value;
}
@@ -368,8 +368,8 @@ void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {
void macho::markSymAsAddrSig(Symbol *s) {
if (auto *d = dyn_cast_or_null<Defined>(s))
- if (d->isec)
- d->isec->keepUnique = true;
+ if (d->isec())
+ d->isec()->keepUnique = true;
}
void macho::markAddrSigSymbols() {
@@ -430,8 +430,8 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
if (isFoldable) {
foldable.push_back(isec);
for (Defined *d : isec->symbols)
- if (d->unwindEntry)
- foldable.push_back(d->unwindEntry);
+ if (d->unwindEntry())
+ foldable.push_back(d->unwindEntry());
// Some sections have embedded addends that foil ICF's hashing / equality
// checks. (We can ignore embedded addends when doing ICF because the same
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index b36d390cc16ade..8d66b37534f47e 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -1170,7 +1170,7 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
continue;
}
add += sym->value;
- referentIsec = cast<ConcatInputSection>(sym->isec);
+ referentIsec = cast<ConcatInputSection>(sym->isec());
} else {
referentIsec =
cast<ConcatInputSection>(r.referent.dyn_cast<InputSection *>());
@@ -1191,7 +1191,7 @@ void ObjFile::registerCompactUnwind(Section &compactUnwindSection) {
++it;
continue;
}
- d->unwindEntry = isec;
+ d->originalUnwindEntry = isec;
// Now that the symbol points to the unwind entry, we can remove the reloc
// that points from the unwind entry back to the symbol.
//
@@ -1348,7 +1348,7 @@ targetSymFromCanonicalSubtractor(const InputSection *isec,
}
if (Invert)
std::swap(pcSym, target);
- if (pcSym->isec == isec) {
+ if (pcSym->isec() == isec) {
if (pcSym->value - (Invert ? -1 : 1) * minuend.addend != subtrahend.offset)
fatal("invalid FDE relocation in __eh_frame");
} else {
@@ -1420,7 +1420,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
// We already have an explicit relocation for the CIE offset.
cieIsec =
targetSymFromCanonicalSubtractor</*Invert=*/true>(isec, cieOffRelocIt)
- ->isec;
+ ->isec();
dataOff += sizeof(uint32_t);
} else {
// If we haven't found a relocation, then the CIE offset is most likely
@@ -1480,15 +1480,15 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
// to register the unwind entry under same symbol.
// This is not particularly efficient, but we should run into this case
// infrequently (only when handling the output of `ld -r`).
- if (funcSym->isec)
- funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec),
+ if (funcSym->isec())
+ funcSym = findSymbolAtOffset(cast<ConcatInputSection>(funcSym->isec()),
funcSym->value);
} else {
funcSym = findSymbolAtAddress(sections, funcAddr);
ehRelocator.makePcRel(funcAddrOff, funcSym, target->p2WordSize);
}
// The symbol has been coalesced, or already has a compact unwind entry.
- if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry) {
+ if (!funcSym || funcSym->getFile() != this || funcSym->unwindEntry()) {
// We must prune unused FDEs for correctness, so we cannot rely on
// -dead_strip being enabled.
isec->live = false;
@@ -1497,7 +1497,8 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
InputSection *lsdaIsec = nullptr;
if (lsdaAddrRelocIt != isec->relocs.end()) {
- lsdaIsec = targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec;
+ lsdaIsec =
+ targetSymFromCanonicalSubtractor(isec, lsdaAddrRelocIt)->isec();
} else if (lsdaAddrOpt) {
uint64_t lsdaAddr = *lsdaAddrOpt;
Section *sec = findContainingSection(sections, &lsdaAddr);
@@ -1507,7 +1508,7 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) {
}
fdes[isec] = {funcLength, cie.personalitySymbol, lsdaIsec};
- funcSym->unwindEntry = isec;
+ funcSym->originalUnwindEntry = isec;
ehRelocator.commit();
}
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 5c1e07cd21b1fb..904701731684b3 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -194,10 +194,8 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
copy->live = false;
copy->wasCoalesced = true;
copy->replacement = this;
- for (auto ©Sym : copy->symbols) {
+ for (auto ©Sym : copy->symbols)
copySym->wasIdenticalCodeFolded = true;
- copySym->size = 0;
- }
symbols.insert(symbols.end(), copy->symbols.begin(), copy->symbols.end());
copy->symbols.clear();
@@ -207,7 +205,7 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {
return;
for (auto it = symbols.begin() + 1; it != symbols.end(); ++it) {
assert((*it)->value == 0);
- (*it)->unwindEntry = nullptr;
+ (*it)->originalUnwindEntry = nullptr;
}
}
diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 2a31a5c09cdd22..5bcaeca48da2a2 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -77,8 +77,8 @@ static MapInfo gatherMapInfo() {
// Only emit the prevailing definition of a symbol. Also, don't emit
// the symbol if it is part of a cstring section (we use the literal
// value instead, similar to ld64)
- if (d->isec && d->getFile() == file &&
- !isa<CStringInputSection>(d->isec)) {
+ if (d->isec() && d->getFile() == file &&
+ !isa<CStringInputSection>(d->isec())) {
isReferencedFile = true;
if (!d->isLive())
info.deadSymbols.push_back(d);
@@ -155,6 +155,12 @@ static void printNonLazyPointerSection(raw_fd_ostream &os,
target->wordSize, sym->getName().str().data());
}
+static uint64_t getSymSizeForMap(Defined *sym) {
+ if (sym->wasIdenticalCodeFolded)
+ return 0;
+ return sym->size;
+}
+
void macho::writeMapFile() {
if (config->mapFile.empty())
return;
@@ -201,9 +207,10 @@ void macho::writeMapFile() {
auto printIsecArrSyms = [&](const std::vector<ConcatInputSection *> &arr) {
for (const ConcatInputSection *isec : arr) {
for (Defined *sym : isec->symbols) {
- if (!(isPrivateLabel(sym->getName()) && sym->size == 0))
+ if (!(isPrivateLabel(sym->getName()) && getSymSizeForMap(sym) == 0))
os << format("0x%08llX\t0x%08llX\t[%3u] %s\n", sym->getVA(),
- sym->size, readerToFileOrdinal[sym->getFile()],
+ getSymSizeForMap(sym),
+ readerToFileOrdinal[sym->getFile()],
sym->getName().str().data());
}
}
@@ -255,7 +262,7 @@ void macho::writeMapFile() {
os << "# \tSize \tFile Name\n";
for (Defined *sym : info.deadSymbols) {
assert(!sym->isLive());
- os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", sym->size,
+ os << format("<<dead>>\t0x%08llX\t[%3u] %s\n", getSymSizeForMap(sym),
readerToFileOrdinal[sym->getFile()],
sym->getName().str().data());
}
diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp
index a37213d5613afb..c26c3aa321197e 100644
--- a/lld/MachO/MarkLive.cpp
+++ b/lld/MachO/MarkLive.cpp
@@ -110,10 +110,10 @@ void MarkLiveImpl<RecordWhyLive>::addSym(
if (!config->whyLive.empty() && config->whyLive.match(s->getName()))
printWhyLive(s, prev);
if (auto *d = dyn_cast<Defined>(s)) {
- if (d->isec)
- enqueue(d->isec, d->value, prev);
- if (d->unwindEntry)
- enqueue(d->unwindEntry, 0, prev);
+ if (d->isec())
+ enqueue(d->isec(), d->value, prev);
+ if (d->unwindEntry())
+ enqueue(d->unwindEntry(), 0, prev);
}
}
@@ -179,7 +179,7 @@ void MarkLiveImpl<RecordWhyLive>::markTransitively() {
if (s->isLive()) {
InputSection *referentIsec = nullptr;
if (auto *d = dyn_cast<Defined>(s))
- referentIsec = d->isec;
+ referentIsec = d->isec();
enqueue(isec, 0, makeEntry(referentIsec, nullptr));
}
} else {
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 5902b82d30f556..6d5d35809ba5a1 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -191,8 +191,8 @@ static StringRef getReferentString(const Reloc &r) {
if (auto *isec = r.referent.dyn_cast<InputSection *>())
return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend);
auto *sym = cast<Defined>(r.referent.get<Symbol *>());
- return cast<CStringInputSection>(sym->isec)->getStringRefAtOffset(sym->value +
- r.addend);
+ return cast<CStringInputSection>(sym->isec())
+ ->getStringRefAtOffset(sym->value + r.addend);
}
void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
@@ -306,7 +306,7 @@ void ObjcCategoryChecker::parseClass(const Defined *classSym) {
return nullptr;
};
- const auto *classIsec = cast<ConcatInputSection>(classSym->isec);
+ const auto *classIsec = cast<ConcatInputSection>(classSym->isec());
// Parse instance methods.
if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
@@ -314,7 +314,7 @@ void ObjcCategoryChecker::parseClass(const Defined *classSym) {
MK_Instance);
// Class methods are contained in the metaclass.
- if (const auto *r = classSym->isec->getRelocAt(classLayout.metaClassOffset))
+ if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))
if (const auto *classMethodsIsec = getMethodsIsec(
cast<ConcatInputSection>(r->getReferentInputSection())))
parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);
@@ -561,9 +561,9 @@ void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
if (!sym)
return;
- if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec))
+ if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
eraseISec(cisec);
- else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec)) {
+ else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
uint32_t totalOffset = sym->value + reloc->addend;
StringPiece &piece = csisec->getStringPiece(totalOffset);
piece.live = false;
@@ -588,7 +588,7 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
assert(catNameSym && "Category does not have a valid name Symbol");
collectSectionWriteInfoFromIsec<CStringSection>(
- catNameSym->isec, infoCategoryWriter.catNameInfo);
+ catNameSym->isec(), infoCategoryWriter.catNameInfo);
}
// Collect writer info from all the category lists (we're assuming they all
@@ -599,7 +599,7 @@ void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
if (Defined *ptrList =
tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
collectSectionWriteInfoFromIsec<ConcatOutputSection>(
- ptrList->isec, infoCategoryWriter.catPtrListInfo);
+ ptrList->isec(), infoCategoryWriter.catPtrListInfo);
// we've successfully collected data, so we can break
break;
}
@@ -627,7 +627,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
// platform pointer size, but to simplify implementation we always just read
// the lower 32b which should be good enough.
uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
- ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
+ ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
ptrList.structCount += protocolCount;
ptrList.structSize = target->wordSize;
@@ -636,7 +636,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
(protocolCount * target->wordSize) +
/*header(count)*/ protocolListHeaderLayout.totalSize +
/*extra null value*/ target->wordSize;
- assert(expectedListSize == ptrListSym->isec->data.size() &&
+ assert(expectedListSize == ptrListSym->isec()->data.size() &&
"Protocol list does not match expected size");
// Suppress unsuded var warning
@@ -644,7 +644,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
uint32_t off = protocolListHeaderLayout.totalSize;
for (uint32_t inx = 0; inx < protocolCount; ++inx) {
- const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
+ const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
assert(reloc && "No reloc found at protocol list offset");
auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
@@ -653,7 +653,7 @@ void ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
ptrList.allPtrs.push_back(listSym);
off += target->wordSize;
}
- assert((ptrListSym->isec->getRelocAt(off) == nullptr) &&
+ assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
"expected null terminating protocol");
assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
"Protocol list end offset does not match expected size");
@@ -678,9 +678,9 @@ void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
assert(ptrListSym && "Reloc does not have a valid Defined");
uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
- ptrListSym->isec->data.data() + listHeaderLayout.structSizeOffset);
+ ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
- ptrListSym->isec->data.data() + listHeaderLayout.structCountOffset);
+ ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
@@ -690,12 +690,12 @@ void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
uint32_t expectedListSize =
listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
- assert(expectedListSize == ptrListSym->isec->data.size() &&
+ assert(expectedListSize == ptrListSym->isec()->data.size() &&
"Pointer list does not match expected size");
for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
off += target->wordSize) {
- const Reloc *reloc = ptrListSym->isec->getRelocAt(off);
+ const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
assert(reloc && "No reloc found at pointer list offset");
auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
@@ -1054,7 +1054,7 @@ void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
r.offset = offset;
r.addend = 0;
r.referent = const_cast<Symbol *>(refTo);
- refFrom->isec->relocs.push_back(r);
+ refFrom->isec()->relocs.push_back(r);
}
void ObjcCategoryMerger::collectAndValidateCategoriesData() {
@@ -1076,7 +1076,7 @@ void ObjcCategoryMerger::collectAndValidateCategoriesData() {
if (!categorySym->getName().starts_with(objc::symbol_names::category))
continue;
- auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec);
+ auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
assert(catBodyIsec &&
"Category data section is not an ConcatInputSection");
diff --git a/lld/MachO/Relocations.cpp b/lld/MachO/Relocations.cpp
index 4e840c6912cc57..afe7f454e6a230 100644
--- a/lld/MachO/Relocations.cpp
+++ b/lld/MachO/Relocations.cpp
@@ -24,7 +24,7 @@ static_assert(sizeof(void *) != 8 || sizeof(Reloc) == 24,
InputSection *Reloc::getReferentInputSection() const {
if (const auto *sym = referent.dyn_cast<Symbol *>()) {
if (const auto *d = dyn_cast<Defined>(sym))
- return d->isec;
+ return d->isec();
return nullptr;
} else {
return referent.get<InputSection *>();
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index 976ea03980e963..907aee29d2386f 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -236,7 +236,7 @@ DenseMap<const InputSection *, size_t> CallGraphSort::run() {
// section.
for (Symbol *sym : isec->getFile()->symbols) {
if (auto *d = dyn_cast_or_null<Defined>(sym)) {
- if (d->isec == isec)
+ if (d->isec() == isec)
os << sym->getName() << "\n";
}
}
@@ -258,7 +258,7 @@ macho::PriorityBuilder::getSymbolPriority(const Defined *sym) {
if (it == priorities.end())
return std::nullopt;
const SymbolPriorityEntry &entry = it->second;
- const InputFile *f = sym->isec->...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm!
d391136
to
f618ad4
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
Currently, when moving symbols from one
InputSection
to another (like in ICF) we directly update the symbol'sisec
,unwindEntry
andsize
. By doing this we lose the original information. This information will be needed in a future change. Since when moving symbols we always set the symbol'swasCoalesced
andisec-> replacement
, we can just use this info to conditionally get the information we need at access time.