-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLD][COFF] Store reference to SymbolTable instead of COFFLinkerContext in InputFile (NFC) #119296
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
In preparation for #119294. |
@llvm/pr-subscribers-lld-coff @llvm/pr-subscribers-lld Author: Jacek Caban (cjacek) ChangesThis change prepares for the introduction of separate hybrid namespaces. Hybrid images will require two Patch is 36.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119296.diff 9 Files Affected:
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 23fab0e66bb67f..6490adcc81d66e 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -56,7 +56,7 @@ SectionChunk::SectionChunk(ObjFile *f, const coff_section *h, Kind k)
// files will be built with -ffunction-sections or /Gy, so most things worth
// stripping will be in a comdat.
if (file)
- live = !file->ctx.config.doGC || !isCOMDAT();
+ live = !file->symtab.ctx.config.doGC || !isCOMDAT();
else
live = true;
}
@@ -129,7 +129,7 @@ void SectionChunk::applyRelX64(uint8_t *off, uint16_t type, OutputSection *os,
case IMAGE_REL_AMD64_REL32_4: add32(off, s - p - 8); break;
case IMAGE_REL_AMD64_REL32_5: add32(off, s - p - 9); break;
case IMAGE_REL_AMD64_SECTION:
- applySecIdx(off, os, file->ctx.outputSections.size());
+ applySecIdx(off, os, file->symtab.ctx.outputSections.size());
break;
case IMAGE_REL_AMD64_SECREL: applySecRel(this, off, os, s); break;
default:
@@ -149,7 +149,7 @@ void SectionChunk::applyRelX86(uint8_t *off, uint16_t type, OutputSection *os,
case IMAGE_REL_I386_DIR32NB: add32(off, s); break;
case IMAGE_REL_I386_REL32: add32(off, s - p - 4); break;
case IMAGE_REL_I386_SECTION:
- applySecIdx(off, os, file->ctx.outputSections.size());
+ applySecIdx(off, os, file->symtab.ctx.outputSections.size());
break;
case IMAGE_REL_I386_SECREL: applySecRel(this, off, os, s); break;
default:
@@ -225,7 +225,7 @@ void SectionChunk::applyRelARM(uint8_t *off, uint16_t type, OutputSection *os,
case IMAGE_REL_ARM_BRANCH24T: applyBranch24T(off, sx - p - 4); break;
case IMAGE_REL_ARM_BLX23T: applyBranch24T(off, sx - p - 4); break;
case IMAGE_REL_ARM_SECTION:
- applySecIdx(off, os, file->ctx.outputSections.size());
+ applySecIdx(off, os, file->symtab.ctx.outputSections.size());
break;
case IMAGE_REL_ARM_SECREL: applySecRel(this, off, os, s); break;
case IMAGE_REL_ARM_REL32: add32(off, sx - p - 4); break;
@@ -346,7 +346,7 @@ void SectionChunk::applyRelARM64(uint8_t *off, uint16_t type, OutputSection *os,
case IMAGE_REL_ARM64_SECREL_HIGH12A: applySecRelHigh12A(this, off, os, s); break;
case IMAGE_REL_ARM64_SECREL_LOW12L: applySecRelLdr(this, off, os, s); break;
case IMAGE_REL_ARM64_SECTION:
- applySecIdx(off, os, file->ctx.outputSections.size());
+ applySecIdx(off, os, file->symtab.ctx.outputSections.size());
break;
case IMAGE_REL_ARM64_REL32: add32(off, s - p - 4); break;
default:
@@ -427,7 +427,8 @@ void SectionChunk::applyRelocation(uint8_t *off,
// section is needed to compute SECREL and SECTION relocations used in debug
// info.
Chunk *c = sym ? sym->getChunk() : nullptr;
- OutputSection *os = c ? file->ctx.getOutputSection(c) : nullptr;
+ COFFLinkerContext &ctx = file->symtab.ctx;
+ OutputSection *os = c ? ctx.getOutputSection(c) : nullptr;
// Skip the relocation if it refers to a discarded section, and diagnose it
// as an error if appropriate. If a symbol was discarded early, it may be
@@ -435,7 +436,7 @@ void SectionChunk::applyRelocation(uint8_t *off,
// it was an absolute or synthetic symbol.
if (!sym ||
(!os && !isa<DefinedAbsolute>(sym) && !isa<DefinedSynthetic>(sym))) {
- maybeReportRelocationToDiscarded(this, sym, rel, file->ctx.config.mingw);
+ maybeReportRelocationToDiscarded(this, sym, rel, ctx.config.mingw);
return;
}
@@ -443,7 +444,7 @@ void SectionChunk::applyRelocation(uint8_t *off,
// Compute the RVA of the relocation for relative relocations.
uint64_t p = rva + rel.VirtualAddress;
- uint64_t imageBase = file->ctx.config.imageBase;
+ uint64_t imageBase = ctx.config.imageBase;
switch (getArch()) {
case Triple::x86_64:
applyRelX64(off, rel.Type, os, s, p, imageBase);
@@ -669,7 +670,7 @@ void SectionChunk::getRuntimePseudoRelocs(
toString(file));
continue;
}
- int addressSizeInBits = file->ctx.config.is64() ? 64 : 32;
+ int addressSizeInBits = file->symtab.ctx.config.is64() ? 64 : 32;
if (sizeInBits < addressSizeInBits) {
warn("runtime pseudo relocation in " + toString(file) + " against " +
"symbol " + target->getName() + " is too narrow (only " +
@@ -1098,7 +1099,7 @@ void CHPERedirectionChunk::writeTo(uint8_t *buf) const {
}
ImportThunkChunkARM64EC::ImportThunkChunkARM64EC(ImportFile *file)
- : ImportThunkChunk(file->ctx, file->impSym), file(file) {}
+ : ImportThunkChunk(file->symtab.ctx, file->impSym), file(file) {}
size_t ImportThunkChunkARM64EC::getSize() const {
if (!extended)
@@ -1122,7 +1123,7 @@ void ImportThunkChunkARM64EC::writeTo(uint8_t *buf) const {
applyArm64Addr(buf + 8, exitThunkRVA, rva + 8, 12);
applyArm64Imm(buf + 12, exitThunkRVA & 0xfff, 0);
- Defined *helper = cast<Defined>(file->ctx.config.arm64ECIcallHelper);
+ Defined *helper = cast<Defined>(file->symtab.ctx.config.arm64ECIcallHelper);
if (extended) {
// Replace last instruction with an inline range extension thunk.
memcpy(buf + 16, arm64Thunk, sizeof(arm64Thunk));
@@ -1136,7 +1137,7 @@ void ImportThunkChunkARM64EC::writeTo(uint8_t *buf) const {
bool ImportThunkChunkARM64EC::verifyRanges() {
if (extended)
return true;
- auto helper = cast<Defined>(file->ctx.config.arm64ECIcallHelper);
+ auto helper = cast<Defined>(file->symtab.ctx.config.arm64ECIcallHelper);
return isInt<28>(helper->getRVA() - rva - 16);
}
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index 0f6a40a41ca00f..3d6ed5a9ddeae9 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -160,13 +160,14 @@ class AuxImportChunk : public NonSectionChunk {
void writeTo(uint8_t *buf) const override {
uint64_t impchkVA = 0;
if (file->impchkThunk)
- impchkVA = file->impchkThunk->getRVA() + file->ctx.config.imageBase;
+ impchkVA =
+ file->impchkThunk->getRVA() + file->symtab.ctx.config.imageBase;
write64le(buf, impchkVA);
}
void getBaserels(std::vector<Baserel> *res) override {
if (file->impchkThunk)
- res->emplace_back(rva, file->ctx.config.machine);
+ res->emplace_back(rva, file->symtab.ctx.config.machine);
}
private:
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 714de67e88b065..b47b3ffec0a908 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -241,7 +241,7 @@ void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> mb,
break;
case file_magic::pecoff_executable:
if (ctx.config.mingw) {
- ctx.symtab.addFile(make<DLLFile>(ctx, mbref));
+ ctx.symtab.addFile(make<DLLFile>(ctx.symtab, mbref));
break;
}
if (filename.ends_with_insensitive(".dll")) {
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index f32bc5bbbc35f1..42c1a9aa90a0f8 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -78,7 +78,7 @@ const COFFSyncStream &coff::operator<<(const COFFSyncStream &s,
/// Checks that Source is compatible with being a weak alias to Target.
/// If Source is Undefined and has no weak alias set, makes it a weak
/// alias to Target.
-static void checkAndSetWeakAlias(COFFLinkerContext &ctx, InputFile *f,
+static void checkAndSetWeakAlias(SymbolTable &symtab, InputFile *f,
Symbol *source, Symbol *target,
bool isAntiDep) {
if (auto *u = dyn_cast<Undefined>(source)) {
@@ -92,9 +92,9 @@ static void checkAndSetWeakAlias(COFFLinkerContext &ctx, InputFile *f,
// of another symbol emitted near the weak symbol.
// Just use the definition from the first object file that defined
// this weak symbol.
- if (ctx.config.allowDuplicateWeak)
+ if (symtab.ctx.config.allowDuplicateWeak)
return;
- ctx.symtab.reportDuplicate(source, f);
+ symtab.reportDuplicate(source, f);
}
}
u->setWeakAlias(target, isAntiDep);
@@ -106,9 +106,10 @@ static bool ignoredSymbolName(StringRef name) {
}
ArchiveFile::ArchiveFile(COFFLinkerContext &ctx, MemoryBufferRef m)
- : InputFile(ctx, ArchiveKind, m) {}
+ : InputFile(ctx.symtab, ArchiveKind, m) {}
void ArchiveFile::parse() {
+ COFFLinkerContext &ctx = symtab.ctx;
// Parse a MemoryBufferRef as an archive file.
file = CHECK(Archive::create(mb), this);
@@ -134,14 +135,14 @@ void ArchiveFile::parse() {
// Returns a buffer pointing to a member file containing a given symbol.
void ArchiveFile::addMember(const Archive::Symbol &sym) {
const Archive::Child &c =
- CHECK(sym.getMember(),
- "could not get the member for symbol " + toCOFFString(ctx, sym));
+ CHECK(sym.getMember(), "could not get the member for symbol " +
+ toCOFFString(symtab.ctx, sym));
// Return an empty buffer if we have already returned the same buffer.
if (!seen.insert(c.getChildOffset()).second)
return;
- ctx.driver.enqueueArchiveMember(c, sym, getName());
+ symtab.ctx.driver.enqueueArchiveMember(c, sym, getName());
}
std::vector<MemoryBufferRef>
@@ -161,6 +162,9 @@ lld::coff::getArchiveMembers(COFFLinkerContext &ctx, Archive *file) {
return v;
}
+ObjFile::ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy)
+ : InputFile(ctx.symtab, ObjectKind, m, lazy) {}
+
void ObjFile::parseLazy() {
// Native object file.
std::unique_ptr<Binary> coffObjPtr = CHECK(createBinary(mb), this);
@@ -174,7 +178,7 @@ void ObjFile::parseLazy() {
StringRef name = check(coffObj->getSymbolName(coffSym));
if (coffSym.isAbsolute() && ignoredSymbolName(name))
continue;
- ctx.symtab.addLazyObject(this, name);
+ symtab.addLazyObject(this, name);
i += coffSym.getNumberOfAuxSymbols();
}
}
@@ -188,7 +192,8 @@ struct ECMapEntry {
void ObjFile::initializeECThunks() {
for (SectionChunk *chunk : hybmpChunks) {
if (chunk->getContents().size() % sizeof(ECMapEntry)) {
- Err(ctx) << "Invalid .hybmp chunk size " << chunk->getContents().size();
+ Err(symtab.ctx) << "Invalid .hybmp chunk size "
+ << chunk->getContents().size();
continue;
}
@@ -199,15 +204,15 @@ void ObjFile::initializeECThunks() {
auto entry = reinterpret_cast<const ECMapEntry *>(iter);
switch (entry->type) {
case Arm64ECThunkType::Entry:
- ctx.symtab.addEntryThunk(getSymbol(entry->src), getSymbol(entry->dst));
+ symtab.addEntryThunk(getSymbol(entry->src), getSymbol(entry->dst));
break;
case Arm64ECThunkType::Exit:
- ctx.symtab.addExitThunk(getSymbol(entry->src), getSymbol(entry->dst));
+ symtab.addExitThunk(getSymbol(entry->src), getSymbol(entry->dst));
break;
case Arm64ECThunkType::GuestExit:
break;
default:
- Warn(ctx) << "Ignoring unknown EC thunk type " << entry->type;
+ Warn(symtab.ctx) << "Ignoring unknown EC thunk type " << entry->type;
}
}
}
@@ -221,7 +226,7 @@ void ObjFile::parse() {
bin.release();
coffObj.reset(obj);
} else {
- Fatal(ctx) << toString(this) << " is not a COFF file";
+ Fatal(symtab.ctx) << toString(this) << " is not a COFF file";
}
// Read section and symbol tables.
@@ -235,7 +240,7 @@ void ObjFile::parse() {
const coff_section *ObjFile::getSection(uint32_t i) {
auto sec = coffObj->getSection(i);
if (!sec)
- Fatal(ctx) << "getSection failed: #" << i << ": " << sec.takeError();
+ Fatal(symtab.ctx) << "getSection failed: #" << i << ": " << sec.takeError();
return *sec;
}
@@ -268,8 +273,8 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
if (Expected<StringRef> e = coffObj->getSectionName(sec))
name = *e;
else
- Fatal(ctx) << "getSectionName failed: #" << sectionNumber << ": "
- << e.takeError();
+ Fatal(symtab.ctx) << "getSectionName failed: #" << sectionNumber << ": "
+ << e.takeError();
if (name == ".drectve") {
ArrayRef<uint8_t> data;
@@ -299,7 +304,7 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
// and then write it to a separate .pdb file.
// Ignore DWARF debug info unless requested to be included.
- if (!ctx.config.includeDwarfChunks && name.starts_with(".debug_"))
+ if (!symtab.ctx.config.includeDwarfChunks && name.starts_with(".debug_"))
return nullptr;
if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
@@ -328,12 +333,12 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
sxDataChunks.push_back(c);
else if (isArm64EC(getMachineType()) && name == ".hybmp$x")
hybmpChunks.push_back(c);
- else if (ctx.config.tailMerge && sec->NumberOfRelocations == 0 &&
+ else if (symtab.ctx.config.tailMerge && sec->NumberOfRelocations == 0 &&
name == ".rdata" && leaderName.starts_with("??_C@"))
// COFF sections that look like string literal sections (i.e. no
// relocations, in .rdata, leader symbol name matches the MSVC name mangling
// for string literals) are subject to string tail merging.
- MergeChunk::addSection(ctx, c);
+ MergeChunk::addSection(symtab.ctx, c);
else if (name == ".rsrc" || name.starts_with(".rsrc$"))
resourceChunks.push_back(c);
else
@@ -364,9 +369,10 @@ void ObjFile::readAssociativeDefinition(COFFSymbolRef sym,
const coff_section *parentSec = getSection(parentIndex);
if (Expected<StringRef> e = coffObj->getSectionName(parentSec))
parentName = *e;
- Err(ctx) << toString(this) << ": associative comdat " << name << " (sec "
- << sectionNumber << ") has invalid reference to section "
- << parentName << " (sec " << parentIndex << ")";
+ Err(symtab.ctx) << toString(this) << ": associative comdat " << name
+ << " (sec " << sectionNumber
+ << ") has invalid reference to section " << parentName
+ << " (sec " << parentIndex << ")";
};
if (parent == pendingComdat) {
@@ -427,16 +433,16 @@ Symbol *ObjFile::createRegular(COFFSymbolRef sym) {
if (sym.isExternal()) {
StringRef name = check(coffObj->getSymbolName(sym));
if (sc)
- return ctx.symtab.addRegular(this, name, sym.getGeneric(), sc,
- sym.getValue());
+ return symtab.addRegular(this, name, sym.getGeneric(), sc,
+ sym.getValue());
// For MinGW symbols named .weak.* that point to a discarded section,
// don't create an Undefined symbol. If nothing ever refers to the symbol,
// everything should be fine. If something actually refers to the symbol
// (e.g. the undefined weak alias), linking will fail due to undefined
// references at the end.
- if (ctx.config.mingw && name.starts_with(".weak."))
+ if (symtab.ctx.config.mingw && name.starts_with(".weak."))
return nullptr;
- return ctx.symtab.addUndefined(name, this, false);
+ return symtab.addUndefined(name, this, false);
}
if (sc)
return make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
@@ -456,6 +462,7 @@ void ObjFile::initializeSymbols() {
DenseMap<StringRef, uint32_t> prevailingSectionMap;
std::vector<const coff_aux_section_definition *> comdatDefs(
coffObj->getNumberOfSections() + 1);
+ COFFLinkerContext &ctx = symtab.ctx;
for (uint32_t i = 0; i < numSymbols; ++i) {
COFFSymbolRef coffSym = check(coffObj->getSymbol(i));
@@ -529,7 +536,7 @@ void ObjFile::initializeSymbols() {
for (auto &kv : weakAliases) {
Symbol *sym = kv.first;
const coff_aux_weak_external *aux = kv.second;
- checkAndSetWeakAlias(ctx, this, sym, symbols[aux->TagIndex],
+ checkAndSetWeakAlias(symtab, this, sym, symbols[aux->TagIndex],
aux->Characteristics ==
IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY);
}
@@ -540,17 +547,17 @@ void ObjFile::initializeSymbols() {
Symbol *ObjFile::createUndefined(COFFSymbolRef sym, bool overrideLazy) {
StringRef name = check(coffObj->getSymbolName(sym));
- Symbol *s = ctx.symtab.addUndefined(name, this, overrideLazy);
+ Symbol *s = symtab.addUndefined(name, this, overrideLazy);
// Add an anti-dependency alias for undefined AMD64 symbols on the ARM64EC
// target.
- if (isArm64EC(ctx.config.machine) && getMachineType() == AMD64) {
+ if (isArm64EC(symtab.ctx.config.machine) && getMachineType() == AMD64) {
auto u = dyn_cast<Undefined>(s);
if (u && !u->weakAlias) {
if (std::optional<std::string> mangledName =
getArm64ECMangledFunctionName(name)) {
- Symbol *m = ctx.symtab.addUndefined(saver().save(*mangledName), this,
- /*overrideLazy=*/false);
+ Symbol *m = symtab.addUndefined(saver().save(*mangledName), this,
+ /*overrideLazy=*/false);
u->setWeakAlias(m, /*antiDep=*/true);
}
}
@@ -584,6 +591,7 @@ void ObjFile::handleComdatSelection(
SectionChunk *leaderChunk = leader->getChunk();
COMDATType leaderSelection = leaderChunk->selection;
+ COFFLinkerContext &ctx = symtab.ctx;
assert(leader->data && "Comdat leader without SectionChunk?");
if (isa<BitcodeFile>(leader->file)) {
@@ -624,13 +632,13 @@ void ObjFile::handleComdatSelection(
Log(ctx) << "conflicting comdat type for " << leader << ": "
<< (int)leaderSelection << " in " << leader->getFile() << " and "
<< (int)selection << " in " << this;
- ctx.symtab.reportDuplicate(leader, this);
+ symtab.reportDuplicate(leader, this);
return;
}
switch (selection) {
case IMAGE_COMDAT_SELECT_NODUPLICATES:
- ctx.symtab.reportDuplicate(leader, this);
+ symtab.reportDuplicate(leader, this);
break;
case IMAGE_COMDAT_SELECT_ANY:
@@ -640,14 +648,14 @@ void ObjFile::handleComdatSelection(
case IMAGE_COMDAT_SELECT_SAME_SIZE:
if (leaderChunk->getSize() != getSection(sym)->SizeOfRawData) {
if (!ctx.config.mingw) {
- ctx.symtab.reportDuplicate(leader, this);
+ symtab.reportDuplicate(leader, this);
} else {
const coff_aux_section_definition *leaderDef = nullptr;
if (leaderChunk->file)
leaderDef = findSectionDef(leaderChunk->file->getCOFFObj(),
leaderChunk->getSectionNumber());
if (!leaderDef || leaderDef->Length != def->Length)
- ctx.symtab.reportDuplicate(leader, this);
+ symtab.reportDuplicate(leader, this);
}
}
break;
@@ -658,7 +666,7 @@ void ObjFile::handleComdatSelection(
// if the two comdat sections have e.g. different alignment.
// Match that.
if (leaderChunk->getContents() != newChunk.getContents())
- ctx.symtab.reportDuplicate(leader, this, &newChunk, sym.getValue());
+ symtab.reportDuplicate(leader, this, &newChunk, sym.getValue());
break;
}
@@ -701,10 +709,11 @@ std::optional<Symbol *> ObjFile::createDefined(
if (sym.isCommon()) {
auto *c = make<CommonChunk>(sym);
chunks.push_back(c);
- return ctx.symtab.addCommon(this, getName(), sym.getValue(),
- sym.getGeneric(), c);
+ return symtab.addCommon(this, getName(), sym.getValue(), sym.getGeneric(),
+ c);
}
+ COFFLinkerContext &ctx = symtab.ctx;
if (sym.isAbsolute()) {
StringRef name = getName();
@@ -715,7 +724,7 @@ std::optional<Symbol *> ObjFile::createDefined(
return nullptr;
if (sym.isExternal())
- return ctx.symtab.addAbsolute(name, sym);
+ return symtab.addAbsolute(name, sym);
return make<DefinedAbsolute>(ctx, name, sym);
}
@@ -750,7 +759,7 @@ std::optional<Symbol *> ObjFile::createDefined(
if (sym.isExternal()) {
std::tie(leader, prevailing) =
- ctx.symtab.addComdat(this, getName(), sym.getGeneric());
+ symtab.addComdat(this, getName(), sym.getGeneric());
} else {
leader = make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false,
/*IsExternal*/ false, sym.getGeneric());
@@ -865,6 +874,7 @@ void ObjFile::initializeFlags() {
// DebugTypes.h). Both cases only happen with cl.exe: clang-cl produces regular
// output even with /Yc and /Yu and with /Zi.
void ObjFile::initializeDependencies() {
+ COFFLinkerContex...
[truncated]
|
Copying a comment from #119294 for a better context: With hybrid images and two namespaces, My initial draft followed this approach (encapsulated in an I considered storing both The required changes to use |
So, while the changes to support ARM64EC so far have been quite gradual and non-intrusive, I think this bit changes the fundamental workings of the linker enough that I want most key stakeholders to be aware of it before proceeding. So CC @rnk @aganea @alvinhochun @zmodem @tru (am I missing any other regular LLD/COFF contributors?) To summarize things very briefly; ARM64EC is the special ABI, using the x86_64 ABI style, with code compiled for aarch64 but fitting into an x86_64 ABI world, where some code is emulated and some is AOT compiled aarch64 code. (That's the easy way to view it; in practice, an ARM64EC DLL can mix between regular x86_64 code and ARM64EC code on a per-function basis.) This is pretty much implemented and functional now, as far as I know. ARM64X is the case when you'd essentially have a fat binary, mixing ARM64EC and regular native ARM64 binaries into one single DLL/EXE. Contrary to e.g. MachO fat binaries, this isn't two separate binaries that just are glued together, but it is essentially one single binary, that can e.g. share rdata sections, and share functions that happen to be identical between the two modes. This means that instead of having 100% overhead of such a dual-mode binary, Microsoft says that the actual overhead in their setups is around 30%. So instead of having a "fat binary", they have a "hybrid binary". See http://www.emulators.com/docs/abc_arm64ec_explained.htm for more references on this. In essence, in order to be able to link such a hybrid binary, we need to have two symbol tables in flight at the same time, for each mode. This is probably a notable fundamental change to how LLD/COFF works. But if we want to handle ARM64X, I think it's unavoidable. So this isn't really phrased as a "do you think we should do it?" question, but just a headsup that this is what is happening here and that we'll probably proceed with (after regular proper code review of course), unless there are strong objections against it of course. (I haven't yet had time to actually start reviewing this set of changes, but will try to get started on it soon.) |
Thanks for the link to that writeup, it was helpful, and thanks to Darek, the author, for writing it. I'm still trying to understand the implications of ARM64X are for the traditional compile & link model. My initial understanding was that ARM64X was a standard fat binary system: It's a way to combine two standalone x64 and ARM64EC PE files together into one, maybe with some transparent way to share .rdata, but what you have said here has made me question everything. I'm getting the sense now that this is roughly speaking what a build should look like:
With that model, the original review comments start to make some sense. Speaking to @cjacek 's comments about |
Minor nitpick, but ARM64X is a combination of ARM64EC and regular ARM64, not x64. ARM64EC itself is a freefloating mix of actual ARM64EC and regular x64 object files (and identifies as x64, in the COFF machine field). Linking ARM64EC mostly works as a regular, non-fat link, with one definition of everything, but with lots of thunks inbetween everything, essentially allowing you to jump freely back and forth between AOT compiled ARM64EC and x64 at any function boundary.
Almost, I think it's more like this:
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This bit looks ok. I feel there's some level of inconsistency left around what is passed to the InputFile
subclass constructors though.
@@ -403,8 +404,8 @@ class BitcodeFile : public InputFile { | |||
// .dll file. MinGW only. | |||
class DLLFile : public InputFile { | |||
public: | |||
explicit DLLFile(COFFLinkerContext &ctx, MemoryBufferRef m) | |||
: InputFile(ctx, DLLKind, m) {} | |||
explicit DLLFile(SymbolTable &symtab, MemoryBufferRef m) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here in DLLFile
, we change the constructor to take a SymbolTable&
instead, while for ObjFile
above, we keep taking a COFFLinkerContext&
parameter, and resolve ctx.symtab
within the ObjFile
constructor; 1) why the inconsistency, and 2) shouldn't we stop doing ctx.symtab
here? Or is that a leftover that gets taken care of in one of the upcoming patches?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, after reading the fourth commit, I see - the default is to keep passing ctx
as we don't really know which symbol table it belong to yet, while the cases here where we already pass a known SymbolTable
is more of a special case, where we seem to know for sure beforehand which symbol table it belongs to?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll change it to COFFLinkerContext
. Importing DLLs is more complicated on ARM64X because those DLLs can themselves be ARM64X files. If that's the case, we need to detect it and create two DLLFile
instances: one for the original memory buffer using the native namespace and another for the hybrid view obtained from getHybridObjectView()
. With this mechanism, the caller will already know the exact namespace for the created file instance, so they might as well pass it. That said, we can probably address this later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, I remember now why I didn't change it: the constructor would need to be moved out of the header because COFFLinkerContext
is an incomplete type in InputFiles.h
. Since it will ultimately use the symtab
argument (as mentioned above), I updated the type in this commit already. Should I change it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't really have a strong opinion on it; moving the constructor to the .cpp file here adds a bit more churn if it can be moved back to the header later, so it's probably fine to keep it like this for now - I mostly wanted to raise the issue and hear the reasoning around it.
…xt in InputFile (NFC) This change prepares for the introduction of separate hybrid namespaces. Hybrid images will require two SymbolTable instances, making it necessary to associate InputFile objects with the relevant one.
`symtab.ctx.symtab` is just `symtab`. Looks like #119296 added this using a global find-and-replace. This was the only instance of `symtab.ctx.symtab` in lld/. No behavior change.
This change prepares for the introduction of separate hybrid namespaces. Hybrid images will require two
SymbolTable
instances, making it necessary to associateInputFile
objects with the relevant one.