-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLD][COFF] Add support for ARM64EC entry thunks. #88132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
CC @bylaws |
@llvm/pr-subscribers-llvm-binary-utilities @llvm/pr-subscribers-platform-windows Author: Jacek Caban (cjacek) ChangesFor x86_64 callable functions, ARM64EC requires an entry thunk generated by the compiler. The linker interprets .hybmp sections to associate function chunks with their entry points and writes an offset to thunks preceding function section contents. Additionally, ICF needs to be aware of entry thunks to not consider chunks to be equal when they have different entry thunks, and GC needs to mark entry thunks together with function chunks. I used a new SectionChunkEC class instead of storing entry thunks in SectionChunk, following the guideline to keep SectionChunk as compact as possible. This way, there is no memory usage increase on non-EC targets. Patch is 25.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88132.diff 13 Files Affected:
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 004d71097387a2..9ee0c29a4cd3cf 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -31,8 +31,8 @@ using llvm::support::ulittle32_t;
namespace lld::coff {
-SectionChunk::SectionChunk(ObjFile *f, const coff_section *h)
- : Chunk(SectionKind), file(f), header(h), repl(this) {
+SectionChunk::SectionChunk(ObjFile *f, const coff_section *h, Kind k)
+ : Chunk(k), file(f), header(h), repl(this) {
// Initialize relocs.
if (file)
setRelocs(file->getCOFFObj()->getRelocations(header));
@@ -410,6 +410,10 @@ void SectionChunk::writeTo(uint8_t *buf) const {
applyRelocation(buf + rel.VirtualAddress, rel);
}
+
+ // Write the offset to EC entry thunk preceding section contents.
+ if (Defined *entryThunk = getEntryThunk())
+ write32le(buf - sizeof(uint32_t), entryThunk->getRVA() - rva + 1);
}
void SectionChunk::applyRelocation(uint8_t *off,
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index bb919037ecc263..df311524a8d185 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -55,7 +55,12 @@ enum : unsigned { Log2MaxSectionAlignment = 13 };
// doesn't even have actual data (if common or bss).
class Chunk {
public:
- enum Kind : uint8_t { SectionKind, OtherKind, ImportThunkKind };
+ enum Kind : uint8_t {
+ SectionKind,
+ SectionECKind,
+ OtherKind,
+ ImportThunkKind
+ };
Kind kind() const { return chunkKind; }
// Returns the size of this chunk (even if this is a common or BSS.)
@@ -120,6 +125,10 @@ class Chunk {
llvm::Triple::ArchType getArch() const;
std::optional<chpe_range_type> getArm64ECRangeType() const;
+ // ARM64EC entry thunk associated with the chunk.
+ Defined *getEntryThunk() const;
+ void setEntryThunk(Defined *entryThunk);
+
protected:
Chunk(Kind k = OtherKind) : chunkKind(k), hasData(true), p2Align(0) {}
@@ -176,7 +185,7 @@ class NonSectionChunk : public Chunk {
// bytes, so this is used only for logging or debugging.
virtual StringRef getDebugName() const { return ""; }
- static bool classof(const Chunk *c) { return c->kind() != SectionKind; }
+ static bool classof(const Chunk *c) { return c->kind() >= OtherKind; }
protected:
NonSectionChunk(Kind k = OtherKind) : Chunk(k) {}
@@ -210,7 +219,7 @@ class RuntimePseudoReloc {
};
// A chunk corresponding a section of an input file.
-class SectionChunk final : public Chunk {
+class SectionChunk : public Chunk {
// Identical COMDAT Folding feature accesses section internal data.
friend class ICF;
@@ -231,8 +240,8 @@ class SectionChunk final : public Chunk {
Symbol *operator*() const { return file->getSymbol(I->SymbolTableIndex); }
};
- SectionChunk(ObjFile *file, const coff_section *header);
- static bool classof(const Chunk *c) { return c->kind() == SectionKind; }
+ SectionChunk(ObjFile *file, const coff_section *header, Kind k = SectionKind);
+ static bool classof(const Chunk *c) { return c->kind() <= SectionECKind; }
size_t getSize() const { return header->SizeOfRawData; }
ArrayRef<uint8_t> getContents() const;
void writeTo(uint8_t *buf) const;
@@ -393,6 +402,16 @@ class SectionChunk final : public Chunk {
uint32_t sectionNameSize = 0;
};
+// A section chunk corresponding a section of an EC input file.
+class SectionChunkEC final : public SectionChunk {
+public:
+ static bool classof(const Chunk *c) { return c->kind() == SectionECKind; }
+
+ SectionChunkEC(ObjFile *file, const coff_section *header)
+ : SectionChunk(file, header, SectionECKind) {}
+ Defined *entryThunk = nullptr;
+};
+
// Inline methods to implement faux-virtual dispatch for SectionChunk.
inline size_t Chunk::getSize() const {
@@ -775,6 +794,17 @@ inline bool Chunk::isHotPatchable() const {
return false;
}
+inline Defined *Chunk::getEntryThunk() const {
+ if (auto *c = dyn_cast<const SectionChunkEC>(this))
+ return c->entryThunk;
+ return nullptr;
+}
+
+inline void Chunk::setEntryThunk(Defined *entryThunk) {
+ if (auto c = dyn_cast<SectionChunkEC>(this))
+ c->entryThunk = entryThunk;
+}
+
void applyMOV32T(uint8_t *off, uint32_t v);
void applyBranch24T(uint8_t *off, int32_t v);
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index b0365b5b94417a..cd143479a11e23 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2604,6 +2604,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (auto *arg = args.getLastArg(OPT_print_symbol_order))
config->printSymbolOrder = arg->getValue();
+ ctx.symtab.initializeEntryThunks();
+
// Identify unreferenced COMDAT sections.
if (config->doGC) {
if (config->mingw) {
diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp
index 013ffcfb3d5d1a..8c30c9f6d96475 100644
--- a/lld/COFF/ICF.cpp
+++ b/lld/COFF/ICF.cpp
@@ -184,9 +184,7 @@ bool ICF::equalsConstant(const SectionChunk *a, const SectionChunk *b) {
// Compare "moving" part of two sections, namely relocation targets.
bool ICF::equalsVariable(const SectionChunk *a, const SectionChunk *b) {
// Compare relocations.
- auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) {
- Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex);
- Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex);
+ auto eqSym = [&](Symbol *b1, Symbol *b2) {
if (b1 == b2)
return true;
if (auto *d1 = dyn_cast<DefinedRegular>(b1))
@@ -194,6 +192,17 @@ bool ICF::equalsVariable(const SectionChunk *a, const SectionChunk *b) {
return d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2];
return false;
};
+ auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) {
+ Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex);
+ Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex);
+ return eqSym(b1, b2);
+ };
+
+ Symbol *e1 = a->getEntryThunk();
+ Symbol *e2 = b->getEntryThunk();
+ if ((e1 || e2) && (!e1 || !e2 || !eqSym(e1, e2)))
+ return false;
+
return std::equal(a->getRelocs().begin(), a->getRelocs().end(),
b->getRelocs().begin(), eq) &&
assocEquals(a, b);
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index 037fae45242c6f..146bd530209323 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -152,6 +152,38 @@ void ObjFile::parseLazy() {
}
}
+struct ECMapEntry {
+ ulittle32_t src;
+ ulittle32_t dst;
+ ulittle32_t type;
+};
+
+void ObjFile::initializeECThunks() {
+ for (SectionChunk *chunk : hybmpChunks) {
+ if (chunk->getContents().size() % sizeof(ECMapEntry)) {
+ error("Invalid .hybmp chunk size " + Twine(chunk->getContents().size()));
+ return;
+ }
+
+ const uint8_t *end =
+ chunk->getContents().data() + chunk->getContents().size();
+ for (const uint8_t *iter = chunk->getContents().data(); iter != end;
+ iter += sizeof(ECMapEntry)) {
+ auto entry = reinterpret_cast<const ECMapEntry *>(iter);
+ switch (entry->type) {
+ case Arm64ECThunkType::Entry:
+ ctx.symtab.addEntryThunk(getSymbol(entry->src), getSymbol(entry->dst));
+ break;
+ case Arm64ECThunkType::Exit:
+ case Arm64ECThunkType::GuestExit:
+ break;
+ default:
+ warn("Ignoring unknown EC thunk type " + Twine(entry->type));
+ }
+ }
+ }
+}
+
void ObjFile::parse() {
// Parse a memory buffer as a COFF file.
std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
@@ -168,6 +200,7 @@ void ObjFile::parse() {
initializeSymbols();
initializeFlags();
initializeDependencies();
+ initializeECThunks();
}
const coff_section *ObjFile::getSection(uint32_t i) {
@@ -242,7 +275,11 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
return nullptr;
- auto *c = make<SectionChunk>(this, sec);
+ SectionChunk *c;
+ if (isArm64EC(getMachineType()))
+ c = make<SectionChunkEC>(this, sec);
+ else
+ c = make<SectionChunk>(this, sec);
if (def)
c->checksum = def->CheckSum;
@@ -260,6 +297,8 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
guardEHContChunks.push_back(c);
else if (name == ".sxdata")
sxDataChunks.push_back(c);
+ else if (isArm64EC(getMachineType()) && name == ".hybmp$x")
+ hybmpChunks.push_back(c);
else if (ctx.config.tailMerge && sec->NumberOfRelocations == 0 &&
name == ".rdata" && leaderName.starts_with("??_C@"))
// COFF sections that look like string literal sections (i.e. no
diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h
index 3b55cd791bfda2..cabd87ba673e3c 100644
--- a/lld/COFF/InputFiles.h
+++ b/lld/COFF/InputFiles.h
@@ -227,6 +227,7 @@ class ObjFile : public InputFile {
void initializeSymbols();
void initializeFlags();
void initializeDependencies();
+ void initializeECThunks();
SectionChunk *
readSection(uint32_t sectionNumber,
@@ -292,6 +293,8 @@ class ObjFile : public InputFile {
std::vector<SectionChunk *> guardLJmpChunks;
std::vector<SectionChunk *> guardEHContChunks;
+ std::vector<SectionChunk *> hybmpChunks;
+
// This vector contains a list of all symbols defined or referenced by this
// file. They are indexed such that you can get a Symbol by symbol
// index. Nonexistent indices (which are occupied by auxiliary
diff --git a/lld/COFF/MarkLive.cpp b/lld/COFF/MarkLive.cpp
index 2cf216a6aaad56..06079a98f2d00a 100644
--- a/lld/COFF/MarkLive.cpp
+++ b/lld/COFF/MarkLive.cpp
@@ -68,6 +68,10 @@ void markLive(COFFLinkerContext &ctx) {
// Mark associative sections if any.
for (SectionChunk &c : sc->children())
enqueue(&c);
+
+ // Mark EC entry thunks.
+ if (Defined *entryThunk = sc->getEntryThunk())
+ addSym(entryThunk);
}
}
}
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 3accf24663c6a0..b4ce79879b6b9f 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -561,6 +561,25 @@ std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
return result;
}
+void SymbolTable::addEntryThunk(Symbol *from, Symbol *to) {
+ entryThunks.push_back({from, to});
+}
+
+void SymbolTable::initializeEntryThunks() {
+ for (auto it : entryThunks) {
+ auto *to = dyn_cast<Defined>(it.second);
+ if (!to)
+ continue;
+ auto *from = dyn_cast<DefinedRegular>(it.first);
+ if (!from || !from->getChunk()->isCOMDAT() ||
+ cast<DefinedRegular>(from)->getValue()) {
+ error("non COMDAT symbol '" + from->getName() + "' in hybrid map");
+ continue;
+ }
+ from->getChunk()->setEntryThunk(to);
+ }
+}
+
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
bool isWeakAlias) {
auto [s, wasInserted] = insert(name, f);
diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index fc623c2840d401..93b376b69f7ecf 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -106,6 +106,8 @@ class SymbolTable {
Symbol *addImportThunk(StringRef name, DefinedImportData *s,
uint16_t machine);
void addLibcall(StringRef name);
+ void addEntryThunk(Symbol *from, Symbol *to);
+ void initializeEntryThunks();
void reportDuplicate(Symbol *existing, InputFile *newFile,
SectionChunk *newSc = nullptr,
@@ -134,6 +136,7 @@ class SymbolTable {
llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> symMap;
std::unique_ptr<BitcodeCompiler> lto;
bool ltoCompilationDone = false;
+ std::vector<std::pair<Symbol *, Symbol *>> entryThunks;
COFFLinkerContext &ctx;
};
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 9c20bbb83d86d1..43f71d8cc385cf 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -1532,6 +1532,10 @@ void Writer::assignAddresses() {
}
if (padding && c->isHotPatchable())
virtualSize += padding;
+ // If chunk has EC entry thunk, reserve a space for an offset to the
+ // thunk.
+ if (c->getEntryThunk())
+ virtualSize += sizeof(uint32_t);
virtualSize = alignTo(virtualSize, c->getAlignment());
c->setRVA(rva + virtualSize);
virtualSize += c->getSize();
diff --git a/lld/test/COFF/arm64ec-entry-thunk.s b/lld/test/COFF/arm64ec-entry-thunk.s
new file mode 100644
index 00000000000000..280c132e32e1a9
--- /dev/null
+++ b/lld/test/COFF/arm64ec-entry-thunk.s
@@ -0,0 +1,345 @@
+// REQUIRES: aarch64
+// RUN: split-file %s %t.dir && cd %t.dir
+
+#--- test-simple.s
+// Build a simple function with an entry thunk.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+
+ .data
+ .rva func
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadcfg.obj
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-simple.s -o test-simple.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-simple.dll loadcfg.obj test-simple.obj
+// RUN: llvm-objdump -d out-simple.dll | FileCheck --check-prefix=DISASM %s
+
+// DISASM: Disassembly of section .text:
+// DISASM-EMPTY:
+// DISASM-NEXT: 0000000180001000 <.text>:
+// DISASM-NEXT: 180001000: 00000009 udf #0x9
+// DISASM-NEXT: 180001004: 52800020 mov w0, #0x1 // =1
+// DISASM-NEXT: 180001008: d65f03c0 ret
+// DISASM-NEXT: 18000100c: 52800140 mov w0, #0xa // =10
+// DISASM-NEXT: 180001010: d65f03c0 ret
+
+// RUN: llvm-readobj --sections out-simple.dll | FileCheck --check-prefix=HYBMP %s
+// HYBMP-NOT: .hybmp
+
+// RUN: lld-link -machine:arm64x -dll -noentry -out:out-simplex.dll loadcfg.obj test-simple.obj
+// RUN: llvm-objdump -d out-simplex.dll | FileCheck --check-prefix=DISASM %s
+
+#--- test-split-func.s
+// Build a simple function with an entry thunk, but pass it in multiple files.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ ret
+
+#--- test-split-thunk.s
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+#--- test-split-hybmp.s
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+
+#--- test-split-data.s
+ .data
+ .rva func
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-split-func.s -o test-split-func.obj
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-split-thunk.s -o test-split-thunk.obj
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-split-hybmp.s -o test-split-hybmp.obj
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-split-data.s -o test-split-data.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-split.dll loadcfg.obj \
+// RUN: test-split-func.obj test-split-thunk.obj test-split-data.obj test-split-hybmp.obj
+// RUN: llvm-objdump -d out-split.dll | FileCheck --check-prefix=DISASM %s
+
+#--- test-align.s
+// Build multiple functions with thunks and various alignments and check that entry thunk offsets
+// are correctly placed.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ nop
+ ret
+
+ .section .text,"xr",discard,func2
+ .globl func2
+ .p2align 2
+func2:
+ mov w0, #2
+ ret
+
+ .section .text,"xr",discard,func3
+ .globl func3
+ .p2align 3
+func3:
+ mov w0, #3
+ nop
+ ret
+
+ .section .text,"xr",discard,func4
+ .globl func4
+ .p2align 3
+func4:
+ mov w0, #4
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk2
+ .globl thunk2
+ .p2align 2
+thunk2:
+ mov w0, #20
+ ret
+
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+ .symidx func2
+ .symidx thunk2
+ .word 1
+ .symidx func3
+ .symidx thunk
+ .word 1
+ .symidx func4
+ .symidx thunk
+ .word 1
+
+ .data
+ .rva func
+ .rva func2
+ .rva func3
+ .rva func4
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-align.s -o test-align.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-align.dll loadcfg.obj test-align.obj
+// RUN: llvm-objdump -d out-align.dll | FileCheck --check-prefix=ALIGN %s
+
+// ALIGN: Disassembly of section .text:
+// ALIGN-EMPTY:
+// ALIGN-NEXT: 0000000180001000 <.text>:
+// ALIGN-NEXT: 180001000: 00000035 udf #0x35
+// ALIGN-NEXT: 180001004: 52800020 mov w0, #0x1 // =1
+// ALIGN-NEXT: 180001008: d503201f nop
+// ALIGN-NEXT: 18000100c: d65f03c0 ret
+// ALIGN-NEXT: 180001010: 0000002d udf #0x2d
+// ALIGN-NEXT: 180001014: 52800040 mov w0, #0x2 // =2
+// ALIGN-NEXT: 180001018: d65f03c0 ret
+// ALIGN-NEXT: 18000101c: 00000019 udf #0x19
+// ALIGN-NEXT: 180001020: 52800060 mov w0, #0x3 // =3
+// ALIGN-NEXT: 180001024: d503201f nop
+// ALIGN-NEXT: 180001028: d65f03c0 ret
+// ALIGN-NEXT: 18000102c: 00000009 udf #0x9
+// ALIGN-NEXT: 180001030: 52800080 mov w0, #0x4 // =4
+// ALIGN-NEXT: 180001034: d65f03c0 ret
+// ALIGN-NEXT: 180001038: 52800140 mov w0, #0xa // =10
+// ALIGN-NEXT: 18000103c: d65f03c0 ret
+// ALIGN-NEXT: 180001040: 52800280 mov w0, #0x14 // =20
+// ALIGN-NEXT: 180001044: d65f03c0 ret
+
+#--- test-icf-thunk.s
+// Build two functions with identical entry thunks and check that thunks are merged by ICF.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ ret
+
+ .section .text,"xr",discard,func2
+ .globl func2
+ .p2align 2
+func2:
+ mov w0, #2
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk2
+ .globl thunk2
+ .p2align 2
+thunk2:
+ mov w0, #10
+ ret
+
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+ .symidx func2
+ .symidx thunk2
+ .word 1
+
+ .data
+ .rva func
+ .rva func2
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-icf-thunk.s -o test-icf-thunk.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-icf-thunk.dll loadcfg.obj test-icf-thunk.obj
+// RUN: llvm-objdump -d out-icf-thunk.dll | FileCheck --check-prefix=ICF-THUNK %s
+
+// ICF-THUNK: Disassembly of section .text:
+// ICF-THUNK-EMPTY:
+// ICF-THUNK-NEXT: 0000000180001000 <.text>:
+// ICF-THUNK-NEXT: 180001000: 00000015 udf #0x15
+// ICF-THUNK-NEXT: 180001004: 52800020 mov w0, #0x1 // =1
+// ICF-THUNK-NEXT: 180001008: d65f03c0 ret
+// ICF-THUNK-NEXT: 18000100c: 00000009 udf #0x9
+// ICF-THUNK-NEXT: 180001010: 52800040 mov w0, #0x2 // =2
+// ICF-THUNK-NEXT: 180001014: d65f03c0 ret
+// ICF-THUNK-NEXT: 180001018: 52800140 mov w0, #0xa // =10
+// ICF-THUNK-NEXT: 18000101c: d65f03c0 ret
+
+#--- test-icf-diff-thunk.s
+// Build two identical functions with different entry thunks and check that they are not merged by ICF.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ ret
+
+ .section .text,"xr",discard,func2
+ .globl func2
+ .p2align 2
+func2:
+ mov w0, #1
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk2
+ .globl thunk2
+ .p2align 2
+thunk2:
+ mov w0, #20
+ ret
+
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+ .symidx func2
+ .symidx thunk2
+ .word 1
+
+ .data
+ .rva func
+ .rva func2
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-icf-diff-thunk.s -o test-icf-diff-thunk.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-icf-diff-thunk.dll loadcfg.obj test-icf-diff-thunk.obj
+// RUN: llvm-objdump -d out-icf-diff-thunk.dll | FileCheck --check-prefix=ICF-DIFF-THUNK %s
+
+// ICF-DIFF-THUNK: Disassembly of section .text:
+// IC...
[truncated]
|
@llvm/pr-subscribers-lld Author: Jacek Caban (cjacek) ChangesFor x86_64 callable functions, ARM64EC requires an entry thunk generated by the compiler. The linker interprets .hybmp sections to associate function chunks with their entry points and writes an offset to thunks preceding function section contents. Additionally, ICF needs to be aware of entry thunks to not consider chunks to be equal when they have different entry thunks, and GC needs to mark entry thunks together with function chunks. I used a new SectionChunkEC class instead of storing entry thunks in SectionChunk, following the guideline to keep SectionChunk as compact as possible. This way, there is no memory usage increase on non-EC targets. Patch is 25.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88132.diff 13 Files Affected:
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 004d71097387a2..9ee0c29a4cd3cf 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -31,8 +31,8 @@ using llvm::support::ulittle32_t;
namespace lld::coff {
-SectionChunk::SectionChunk(ObjFile *f, const coff_section *h)
- : Chunk(SectionKind), file(f), header(h), repl(this) {
+SectionChunk::SectionChunk(ObjFile *f, const coff_section *h, Kind k)
+ : Chunk(k), file(f), header(h), repl(this) {
// Initialize relocs.
if (file)
setRelocs(file->getCOFFObj()->getRelocations(header));
@@ -410,6 +410,10 @@ void SectionChunk::writeTo(uint8_t *buf) const {
applyRelocation(buf + rel.VirtualAddress, rel);
}
+
+ // Write the offset to EC entry thunk preceding section contents.
+ if (Defined *entryThunk = getEntryThunk())
+ write32le(buf - sizeof(uint32_t), entryThunk->getRVA() - rva + 1);
}
void SectionChunk::applyRelocation(uint8_t *off,
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index bb919037ecc263..df311524a8d185 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -55,7 +55,12 @@ enum : unsigned { Log2MaxSectionAlignment = 13 };
// doesn't even have actual data (if common or bss).
class Chunk {
public:
- enum Kind : uint8_t { SectionKind, OtherKind, ImportThunkKind };
+ enum Kind : uint8_t {
+ SectionKind,
+ SectionECKind,
+ OtherKind,
+ ImportThunkKind
+ };
Kind kind() const { return chunkKind; }
// Returns the size of this chunk (even if this is a common or BSS.)
@@ -120,6 +125,10 @@ class Chunk {
llvm::Triple::ArchType getArch() const;
std::optional<chpe_range_type> getArm64ECRangeType() const;
+ // ARM64EC entry thunk associated with the chunk.
+ Defined *getEntryThunk() const;
+ void setEntryThunk(Defined *entryThunk);
+
protected:
Chunk(Kind k = OtherKind) : chunkKind(k), hasData(true), p2Align(0) {}
@@ -176,7 +185,7 @@ class NonSectionChunk : public Chunk {
// bytes, so this is used only for logging or debugging.
virtual StringRef getDebugName() const { return ""; }
- static bool classof(const Chunk *c) { return c->kind() != SectionKind; }
+ static bool classof(const Chunk *c) { return c->kind() >= OtherKind; }
protected:
NonSectionChunk(Kind k = OtherKind) : Chunk(k) {}
@@ -210,7 +219,7 @@ class RuntimePseudoReloc {
};
// A chunk corresponding a section of an input file.
-class SectionChunk final : public Chunk {
+class SectionChunk : public Chunk {
// Identical COMDAT Folding feature accesses section internal data.
friend class ICF;
@@ -231,8 +240,8 @@ class SectionChunk final : public Chunk {
Symbol *operator*() const { return file->getSymbol(I->SymbolTableIndex); }
};
- SectionChunk(ObjFile *file, const coff_section *header);
- static bool classof(const Chunk *c) { return c->kind() == SectionKind; }
+ SectionChunk(ObjFile *file, const coff_section *header, Kind k = SectionKind);
+ static bool classof(const Chunk *c) { return c->kind() <= SectionECKind; }
size_t getSize() const { return header->SizeOfRawData; }
ArrayRef<uint8_t> getContents() const;
void writeTo(uint8_t *buf) const;
@@ -393,6 +402,16 @@ class SectionChunk final : public Chunk {
uint32_t sectionNameSize = 0;
};
+// A section chunk corresponding a section of an EC input file.
+class SectionChunkEC final : public SectionChunk {
+public:
+ static bool classof(const Chunk *c) { return c->kind() == SectionECKind; }
+
+ SectionChunkEC(ObjFile *file, const coff_section *header)
+ : SectionChunk(file, header, SectionECKind) {}
+ Defined *entryThunk = nullptr;
+};
+
// Inline methods to implement faux-virtual dispatch for SectionChunk.
inline size_t Chunk::getSize() const {
@@ -775,6 +794,17 @@ inline bool Chunk::isHotPatchable() const {
return false;
}
+inline Defined *Chunk::getEntryThunk() const {
+ if (auto *c = dyn_cast<const SectionChunkEC>(this))
+ return c->entryThunk;
+ return nullptr;
+}
+
+inline void Chunk::setEntryThunk(Defined *entryThunk) {
+ if (auto c = dyn_cast<SectionChunkEC>(this))
+ c->entryThunk = entryThunk;
+}
+
void applyMOV32T(uint8_t *off, uint32_t v);
void applyBranch24T(uint8_t *off, int32_t v);
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index b0365b5b94417a..cd143479a11e23 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2604,6 +2604,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (auto *arg = args.getLastArg(OPT_print_symbol_order))
config->printSymbolOrder = arg->getValue();
+ ctx.symtab.initializeEntryThunks();
+
// Identify unreferenced COMDAT sections.
if (config->doGC) {
if (config->mingw) {
diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp
index 013ffcfb3d5d1a..8c30c9f6d96475 100644
--- a/lld/COFF/ICF.cpp
+++ b/lld/COFF/ICF.cpp
@@ -184,9 +184,7 @@ bool ICF::equalsConstant(const SectionChunk *a, const SectionChunk *b) {
// Compare "moving" part of two sections, namely relocation targets.
bool ICF::equalsVariable(const SectionChunk *a, const SectionChunk *b) {
// Compare relocations.
- auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) {
- Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex);
- Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex);
+ auto eqSym = [&](Symbol *b1, Symbol *b2) {
if (b1 == b2)
return true;
if (auto *d1 = dyn_cast<DefinedRegular>(b1))
@@ -194,6 +192,17 @@ bool ICF::equalsVariable(const SectionChunk *a, const SectionChunk *b) {
return d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2];
return false;
};
+ auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) {
+ Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex);
+ Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex);
+ return eqSym(b1, b2);
+ };
+
+ Symbol *e1 = a->getEntryThunk();
+ Symbol *e2 = b->getEntryThunk();
+ if ((e1 || e2) && (!e1 || !e2 || !eqSym(e1, e2)))
+ return false;
+
return std::equal(a->getRelocs().begin(), a->getRelocs().end(),
b->getRelocs().begin(), eq) &&
assocEquals(a, b);
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index 037fae45242c6f..146bd530209323 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -152,6 +152,38 @@ void ObjFile::parseLazy() {
}
}
+struct ECMapEntry {
+ ulittle32_t src;
+ ulittle32_t dst;
+ ulittle32_t type;
+};
+
+void ObjFile::initializeECThunks() {
+ for (SectionChunk *chunk : hybmpChunks) {
+ if (chunk->getContents().size() % sizeof(ECMapEntry)) {
+ error("Invalid .hybmp chunk size " + Twine(chunk->getContents().size()));
+ return;
+ }
+
+ const uint8_t *end =
+ chunk->getContents().data() + chunk->getContents().size();
+ for (const uint8_t *iter = chunk->getContents().data(); iter != end;
+ iter += sizeof(ECMapEntry)) {
+ auto entry = reinterpret_cast<const ECMapEntry *>(iter);
+ switch (entry->type) {
+ case Arm64ECThunkType::Entry:
+ ctx.symtab.addEntryThunk(getSymbol(entry->src), getSymbol(entry->dst));
+ break;
+ case Arm64ECThunkType::Exit:
+ case Arm64ECThunkType::GuestExit:
+ break;
+ default:
+ warn("Ignoring unknown EC thunk type " + Twine(entry->type));
+ }
+ }
+ }
+}
+
void ObjFile::parse() {
// Parse a memory buffer as a COFF file.
std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
@@ -168,6 +200,7 @@ void ObjFile::parse() {
initializeSymbols();
initializeFlags();
initializeDependencies();
+ initializeECThunks();
}
const coff_section *ObjFile::getSection(uint32_t i) {
@@ -242,7 +275,11 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
return nullptr;
- auto *c = make<SectionChunk>(this, sec);
+ SectionChunk *c;
+ if (isArm64EC(getMachineType()))
+ c = make<SectionChunkEC>(this, sec);
+ else
+ c = make<SectionChunk>(this, sec);
if (def)
c->checksum = def->CheckSum;
@@ -260,6 +297,8 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
guardEHContChunks.push_back(c);
else if (name == ".sxdata")
sxDataChunks.push_back(c);
+ else if (isArm64EC(getMachineType()) && name == ".hybmp$x")
+ hybmpChunks.push_back(c);
else if (ctx.config.tailMerge && sec->NumberOfRelocations == 0 &&
name == ".rdata" && leaderName.starts_with("??_C@"))
// COFF sections that look like string literal sections (i.e. no
diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h
index 3b55cd791bfda2..cabd87ba673e3c 100644
--- a/lld/COFF/InputFiles.h
+++ b/lld/COFF/InputFiles.h
@@ -227,6 +227,7 @@ class ObjFile : public InputFile {
void initializeSymbols();
void initializeFlags();
void initializeDependencies();
+ void initializeECThunks();
SectionChunk *
readSection(uint32_t sectionNumber,
@@ -292,6 +293,8 @@ class ObjFile : public InputFile {
std::vector<SectionChunk *> guardLJmpChunks;
std::vector<SectionChunk *> guardEHContChunks;
+ std::vector<SectionChunk *> hybmpChunks;
+
// This vector contains a list of all symbols defined or referenced by this
// file. They are indexed such that you can get a Symbol by symbol
// index. Nonexistent indices (which are occupied by auxiliary
diff --git a/lld/COFF/MarkLive.cpp b/lld/COFF/MarkLive.cpp
index 2cf216a6aaad56..06079a98f2d00a 100644
--- a/lld/COFF/MarkLive.cpp
+++ b/lld/COFF/MarkLive.cpp
@@ -68,6 +68,10 @@ void markLive(COFFLinkerContext &ctx) {
// Mark associative sections if any.
for (SectionChunk &c : sc->children())
enqueue(&c);
+
+ // Mark EC entry thunks.
+ if (Defined *entryThunk = sc->getEntryThunk())
+ addSym(entryThunk);
}
}
}
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 3accf24663c6a0..b4ce79879b6b9f 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -561,6 +561,25 @@ std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
return result;
}
+void SymbolTable::addEntryThunk(Symbol *from, Symbol *to) {
+ entryThunks.push_back({from, to});
+}
+
+void SymbolTable::initializeEntryThunks() {
+ for (auto it : entryThunks) {
+ auto *to = dyn_cast<Defined>(it.second);
+ if (!to)
+ continue;
+ auto *from = dyn_cast<DefinedRegular>(it.first);
+ if (!from || !from->getChunk()->isCOMDAT() ||
+ cast<DefinedRegular>(from)->getValue()) {
+ error("non COMDAT symbol '" + from->getName() + "' in hybrid map");
+ continue;
+ }
+ from->getChunk()->setEntryThunk(to);
+ }
+}
+
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
bool isWeakAlias) {
auto [s, wasInserted] = insert(name, f);
diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index fc623c2840d401..93b376b69f7ecf 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -106,6 +106,8 @@ class SymbolTable {
Symbol *addImportThunk(StringRef name, DefinedImportData *s,
uint16_t machine);
void addLibcall(StringRef name);
+ void addEntryThunk(Symbol *from, Symbol *to);
+ void initializeEntryThunks();
void reportDuplicate(Symbol *existing, InputFile *newFile,
SectionChunk *newSc = nullptr,
@@ -134,6 +136,7 @@ class SymbolTable {
llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> symMap;
std::unique_ptr<BitcodeCompiler> lto;
bool ltoCompilationDone = false;
+ std::vector<std::pair<Symbol *, Symbol *>> entryThunks;
COFFLinkerContext &ctx;
};
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 9c20bbb83d86d1..43f71d8cc385cf 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -1532,6 +1532,10 @@ void Writer::assignAddresses() {
}
if (padding && c->isHotPatchable())
virtualSize += padding;
+ // If chunk has EC entry thunk, reserve a space for an offset to the
+ // thunk.
+ if (c->getEntryThunk())
+ virtualSize += sizeof(uint32_t);
virtualSize = alignTo(virtualSize, c->getAlignment());
c->setRVA(rva + virtualSize);
virtualSize += c->getSize();
diff --git a/lld/test/COFF/arm64ec-entry-thunk.s b/lld/test/COFF/arm64ec-entry-thunk.s
new file mode 100644
index 00000000000000..280c132e32e1a9
--- /dev/null
+++ b/lld/test/COFF/arm64ec-entry-thunk.s
@@ -0,0 +1,345 @@
+// REQUIRES: aarch64
+// RUN: split-file %s %t.dir && cd %t.dir
+
+#--- test-simple.s
+// Build a simple function with an entry thunk.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+
+ .data
+ .rva func
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadcfg.obj
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-simple.s -o test-simple.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-simple.dll loadcfg.obj test-simple.obj
+// RUN: llvm-objdump -d out-simple.dll | FileCheck --check-prefix=DISASM %s
+
+// DISASM: Disassembly of section .text:
+// DISASM-EMPTY:
+// DISASM-NEXT: 0000000180001000 <.text>:
+// DISASM-NEXT: 180001000: 00000009 udf #0x9
+// DISASM-NEXT: 180001004: 52800020 mov w0, #0x1 // =1
+// DISASM-NEXT: 180001008: d65f03c0 ret
+// DISASM-NEXT: 18000100c: 52800140 mov w0, #0xa // =10
+// DISASM-NEXT: 180001010: d65f03c0 ret
+
+// RUN: llvm-readobj --sections out-simple.dll | FileCheck --check-prefix=HYBMP %s
+// HYBMP-NOT: .hybmp
+
+// RUN: lld-link -machine:arm64x -dll -noentry -out:out-simplex.dll loadcfg.obj test-simple.obj
+// RUN: llvm-objdump -d out-simplex.dll | FileCheck --check-prefix=DISASM %s
+
+#--- test-split-func.s
+// Build a simple function with an entry thunk, but pass it in multiple files.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ ret
+
+#--- test-split-thunk.s
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+#--- test-split-hybmp.s
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+
+#--- test-split-data.s
+ .data
+ .rva func
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-split-func.s -o test-split-func.obj
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-split-thunk.s -o test-split-thunk.obj
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-split-hybmp.s -o test-split-hybmp.obj
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-split-data.s -o test-split-data.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-split.dll loadcfg.obj \
+// RUN: test-split-func.obj test-split-thunk.obj test-split-data.obj test-split-hybmp.obj
+// RUN: llvm-objdump -d out-split.dll | FileCheck --check-prefix=DISASM %s
+
+#--- test-align.s
+// Build multiple functions with thunks and various alignments and check that entry thunk offsets
+// are correctly placed.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ nop
+ ret
+
+ .section .text,"xr",discard,func2
+ .globl func2
+ .p2align 2
+func2:
+ mov w0, #2
+ ret
+
+ .section .text,"xr",discard,func3
+ .globl func3
+ .p2align 3
+func3:
+ mov w0, #3
+ nop
+ ret
+
+ .section .text,"xr",discard,func4
+ .globl func4
+ .p2align 3
+func4:
+ mov w0, #4
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk2
+ .globl thunk2
+ .p2align 2
+thunk2:
+ mov w0, #20
+ ret
+
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+ .symidx func2
+ .symidx thunk2
+ .word 1
+ .symidx func3
+ .symidx thunk
+ .word 1
+ .symidx func4
+ .symidx thunk
+ .word 1
+
+ .data
+ .rva func
+ .rva func2
+ .rva func3
+ .rva func4
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-align.s -o test-align.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-align.dll loadcfg.obj test-align.obj
+// RUN: llvm-objdump -d out-align.dll | FileCheck --check-prefix=ALIGN %s
+
+// ALIGN: Disassembly of section .text:
+// ALIGN-EMPTY:
+// ALIGN-NEXT: 0000000180001000 <.text>:
+// ALIGN-NEXT: 180001000: 00000035 udf #0x35
+// ALIGN-NEXT: 180001004: 52800020 mov w0, #0x1 // =1
+// ALIGN-NEXT: 180001008: d503201f nop
+// ALIGN-NEXT: 18000100c: d65f03c0 ret
+// ALIGN-NEXT: 180001010: 0000002d udf #0x2d
+// ALIGN-NEXT: 180001014: 52800040 mov w0, #0x2 // =2
+// ALIGN-NEXT: 180001018: d65f03c0 ret
+// ALIGN-NEXT: 18000101c: 00000019 udf #0x19
+// ALIGN-NEXT: 180001020: 52800060 mov w0, #0x3 // =3
+// ALIGN-NEXT: 180001024: d503201f nop
+// ALIGN-NEXT: 180001028: d65f03c0 ret
+// ALIGN-NEXT: 18000102c: 00000009 udf #0x9
+// ALIGN-NEXT: 180001030: 52800080 mov w0, #0x4 // =4
+// ALIGN-NEXT: 180001034: d65f03c0 ret
+// ALIGN-NEXT: 180001038: 52800140 mov w0, #0xa // =10
+// ALIGN-NEXT: 18000103c: d65f03c0 ret
+// ALIGN-NEXT: 180001040: 52800280 mov w0, #0x14 // =20
+// ALIGN-NEXT: 180001044: d65f03c0 ret
+
+#--- test-icf-thunk.s
+// Build two functions with identical entry thunks and check that thunks are merged by ICF.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ ret
+
+ .section .text,"xr",discard,func2
+ .globl func2
+ .p2align 2
+func2:
+ mov w0, #2
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk2
+ .globl thunk2
+ .p2align 2
+thunk2:
+ mov w0, #10
+ ret
+
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+ .symidx func2
+ .symidx thunk2
+ .word 1
+
+ .data
+ .rva func
+ .rva func2
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-icf-thunk.s -o test-icf-thunk.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-icf-thunk.dll loadcfg.obj test-icf-thunk.obj
+// RUN: llvm-objdump -d out-icf-thunk.dll | FileCheck --check-prefix=ICF-THUNK %s
+
+// ICF-THUNK: Disassembly of section .text:
+// ICF-THUNK-EMPTY:
+// ICF-THUNK-NEXT: 0000000180001000 <.text>:
+// ICF-THUNK-NEXT: 180001000: 00000015 udf #0x15
+// ICF-THUNK-NEXT: 180001004: 52800020 mov w0, #0x1 // =1
+// ICF-THUNK-NEXT: 180001008: d65f03c0 ret
+// ICF-THUNK-NEXT: 18000100c: 00000009 udf #0x9
+// ICF-THUNK-NEXT: 180001010: 52800040 mov w0, #0x2 // =2
+// ICF-THUNK-NEXT: 180001014: d65f03c0 ret
+// ICF-THUNK-NEXT: 180001018: 52800140 mov w0, #0xa // =10
+// ICF-THUNK-NEXT: 18000101c: d65f03c0 ret
+
+#--- test-icf-diff-thunk.s
+// Build two identical functions with different entry thunks and check that they are not merged by ICF.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ ret
+
+ .section .text,"xr",discard,func2
+ .globl func2
+ .p2align 2
+func2:
+ mov w0, #1
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk2
+ .globl thunk2
+ .p2align 2
+thunk2:
+ mov w0, #20
+ ret
+
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+ .symidx func2
+ .symidx thunk2
+ .word 1
+
+ .data
+ .rva func
+ .rva func2
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-icf-diff-thunk.s -o test-icf-diff-thunk.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-icf-diff-thunk.dll loadcfg.obj test-icf-diff-thunk.obj
+// RUN: llvm-objdump -d out-icf-diff-thunk.dll | FileCheck --check-prefix=ICF-DIFF-THUNK %s
+
+// ICF-DIFF-THUNK: Disassembly of section .text:
+// IC...
[truncated]
|
@llvm/pr-subscribers-lld-coff Author: Jacek Caban (cjacek) ChangesFor x86_64 callable functions, ARM64EC requires an entry thunk generated by the compiler. The linker interprets .hybmp sections to associate function chunks with their entry points and writes an offset to thunks preceding function section contents. Additionally, ICF needs to be aware of entry thunks to not consider chunks to be equal when they have different entry thunks, and GC needs to mark entry thunks together with function chunks. I used a new SectionChunkEC class instead of storing entry thunks in SectionChunk, following the guideline to keep SectionChunk as compact as possible. This way, there is no memory usage increase on non-EC targets. Patch is 25.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88132.diff 13 Files Affected:
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 004d71097387a2..9ee0c29a4cd3cf 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -31,8 +31,8 @@ using llvm::support::ulittle32_t;
namespace lld::coff {
-SectionChunk::SectionChunk(ObjFile *f, const coff_section *h)
- : Chunk(SectionKind), file(f), header(h), repl(this) {
+SectionChunk::SectionChunk(ObjFile *f, const coff_section *h, Kind k)
+ : Chunk(k), file(f), header(h), repl(this) {
// Initialize relocs.
if (file)
setRelocs(file->getCOFFObj()->getRelocations(header));
@@ -410,6 +410,10 @@ void SectionChunk::writeTo(uint8_t *buf) const {
applyRelocation(buf + rel.VirtualAddress, rel);
}
+
+ // Write the offset to EC entry thunk preceding section contents.
+ if (Defined *entryThunk = getEntryThunk())
+ write32le(buf - sizeof(uint32_t), entryThunk->getRVA() - rva + 1);
}
void SectionChunk::applyRelocation(uint8_t *off,
diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h
index bb919037ecc263..df311524a8d185 100644
--- a/lld/COFF/Chunks.h
+++ b/lld/COFF/Chunks.h
@@ -55,7 +55,12 @@ enum : unsigned { Log2MaxSectionAlignment = 13 };
// doesn't even have actual data (if common or bss).
class Chunk {
public:
- enum Kind : uint8_t { SectionKind, OtherKind, ImportThunkKind };
+ enum Kind : uint8_t {
+ SectionKind,
+ SectionECKind,
+ OtherKind,
+ ImportThunkKind
+ };
Kind kind() const { return chunkKind; }
// Returns the size of this chunk (even if this is a common or BSS.)
@@ -120,6 +125,10 @@ class Chunk {
llvm::Triple::ArchType getArch() const;
std::optional<chpe_range_type> getArm64ECRangeType() const;
+ // ARM64EC entry thunk associated with the chunk.
+ Defined *getEntryThunk() const;
+ void setEntryThunk(Defined *entryThunk);
+
protected:
Chunk(Kind k = OtherKind) : chunkKind(k), hasData(true), p2Align(0) {}
@@ -176,7 +185,7 @@ class NonSectionChunk : public Chunk {
// bytes, so this is used only for logging or debugging.
virtual StringRef getDebugName() const { return ""; }
- static bool classof(const Chunk *c) { return c->kind() != SectionKind; }
+ static bool classof(const Chunk *c) { return c->kind() >= OtherKind; }
protected:
NonSectionChunk(Kind k = OtherKind) : Chunk(k) {}
@@ -210,7 +219,7 @@ class RuntimePseudoReloc {
};
// A chunk corresponding a section of an input file.
-class SectionChunk final : public Chunk {
+class SectionChunk : public Chunk {
// Identical COMDAT Folding feature accesses section internal data.
friend class ICF;
@@ -231,8 +240,8 @@ class SectionChunk final : public Chunk {
Symbol *operator*() const { return file->getSymbol(I->SymbolTableIndex); }
};
- SectionChunk(ObjFile *file, const coff_section *header);
- static bool classof(const Chunk *c) { return c->kind() == SectionKind; }
+ SectionChunk(ObjFile *file, const coff_section *header, Kind k = SectionKind);
+ static bool classof(const Chunk *c) { return c->kind() <= SectionECKind; }
size_t getSize() const { return header->SizeOfRawData; }
ArrayRef<uint8_t> getContents() const;
void writeTo(uint8_t *buf) const;
@@ -393,6 +402,16 @@ class SectionChunk final : public Chunk {
uint32_t sectionNameSize = 0;
};
+// A section chunk corresponding a section of an EC input file.
+class SectionChunkEC final : public SectionChunk {
+public:
+ static bool classof(const Chunk *c) { return c->kind() == SectionECKind; }
+
+ SectionChunkEC(ObjFile *file, const coff_section *header)
+ : SectionChunk(file, header, SectionECKind) {}
+ Defined *entryThunk = nullptr;
+};
+
// Inline methods to implement faux-virtual dispatch for SectionChunk.
inline size_t Chunk::getSize() const {
@@ -775,6 +794,17 @@ inline bool Chunk::isHotPatchable() const {
return false;
}
+inline Defined *Chunk::getEntryThunk() const {
+ if (auto *c = dyn_cast<const SectionChunkEC>(this))
+ return c->entryThunk;
+ return nullptr;
+}
+
+inline void Chunk::setEntryThunk(Defined *entryThunk) {
+ if (auto c = dyn_cast<SectionChunkEC>(this))
+ c->entryThunk = entryThunk;
+}
+
void applyMOV32T(uint8_t *off, uint32_t v);
void applyBranch24T(uint8_t *off, int32_t v);
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index b0365b5b94417a..cd143479a11e23 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2604,6 +2604,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (auto *arg = args.getLastArg(OPT_print_symbol_order))
config->printSymbolOrder = arg->getValue();
+ ctx.symtab.initializeEntryThunks();
+
// Identify unreferenced COMDAT sections.
if (config->doGC) {
if (config->mingw) {
diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp
index 013ffcfb3d5d1a..8c30c9f6d96475 100644
--- a/lld/COFF/ICF.cpp
+++ b/lld/COFF/ICF.cpp
@@ -184,9 +184,7 @@ bool ICF::equalsConstant(const SectionChunk *a, const SectionChunk *b) {
// Compare "moving" part of two sections, namely relocation targets.
bool ICF::equalsVariable(const SectionChunk *a, const SectionChunk *b) {
// Compare relocations.
- auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) {
- Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex);
- Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex);
+ auto eqSym = [&](Symbol *b1, Symbol *b2) {
if (b1 == b2)
return true;
if (auto *d1 = dyn_cast<DefinedRegular>(b1))
@@ -194,6 +192,17 @@ bool ICF::equalsVariable(const SectionChunk *a, const SectionChunk *b) {
return d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2];
return false;
};
+ auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) {
+ Symbol *b1 = a->file->getSymbol(r1.SymbolTableIndex);
+ Symbol *b2 = b->file->getSymbol(r2.SymbolTableIndex);
+ return eqSym(b1, b2);
+ };
+
+ Symbol *e1 = a->getEntryThunk();
+ Symbol *e2 = b->getEntryThunk();
+ if ((e1 || e2) && (!e1 || !e2 || !eqSym(e1, e2)))
+ return false;
+
return std::equal(a->getRelocs().begin(), a->getRelocs().end(),
b->getRelocs().begin(), eq) &&
assocEquals(a, b);
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index 037fae45242c6f..146bd530209323 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -152,6 +152,38 @@ void ObjFile::parseLazy() {
}
}
+struct ECMapEntry {
+ ulittle32_t src;
+ ulittle32_t dst;
+ ulittle32_t type;
+};
+
+void ObjFile::initializeECThunks() {
+ for (SectionChunk *chunk : hybmpChunks) {
+ if (chunk->getContents().size() % sizeof(ECMapEntry)) {
+ error("Invalid .hybmp chunk size " + Twine(chunk->getContents().size()));
+ return;
+ }
+
+ const uint8_t *end =
+ chunk->getContents().data() + chunk->getContents().size();
+ for (const uint8_t *iter = chunk->getContents().data(); iter != end;
+ iter += sizeof(ECMapEntry)) {
+ auto entry = reinterpret_cast<const ECMapEntry *>(iter);
+ switch (entry->type) {
+ case Arm64ECThunkType::Entry:
+ ctx.symtab.addEntryThunk(getSymbol(entry->src), getSymbol(entry->dst));
+ break;
+ case Arm64ECThunkType::Exit:
+ case Arm64ECThunkType::GuestExit:
+ break;
+ default:
+ warn("Ignoring unknown EC thunk type " + Twine(entry->type));
+ }
+ }
+ }
+}
+
void ObjFile::parse() {
// Parse a memory buffer as a COFF file.
std::unique_ptr<Binary> bin = CHECK(createBinary(mb), this);
@@ -168,6 +200,7 @@ void ObjFile::parse() {
initializeSymbols();
initializeFlags();
initializeDependencies();
+ initializeECThunks();
}
const coff_section *ObjFile::getSection(uint32_t i) {
@@ -242,7 +275,11 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE)
return nullptr;
- auto *c = make<SectionChunk>(this, sec);
+ SectionChunk *c;
+ if (isArm64EC(getMachineType()))
+ c = make<SectionChunkEC>(this, sec);
+ else
+ c = make<SectionChunk>(this, sec);
if (def)
c->checksum = def->CheckSum;
@@ -260,6 +297,8 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber,
guardEHContChunks.push_back(c);
else if (name == ".sxdata")
sxDataChunks.push_back(c);
+ else if (isArm64EC(getMachineType()) && name == ".hybmp$x")
+ hybmpChunks.push_back(c);
else if (ctx.config.tailMerge && sec->NumberOfRelocations == 0 &&
name == ".rdata" && leaderName.starts_with("??_C@"))
// COFF sections that look like string literal sections (i.e. no
diff --git a/lld/COFF/InputFiles.h b/lld/COFF/InputFiles.h
index 3b55cd791bfda2..cabd87ba673e3c 100644
--- a/lld/COFF/InputFiles.h
+++ b/lld/COFF/InputFiles.h
@@ -227,6 +227,7 @@ class ObjFile : public InputFile {
void initializeSymbols();
void initializeFlags();
void initializeDependencies();
+ void initializeECThunks();
SectionChunk *
readSection(uint32_t sectionNumber,
@@ -292,6 +293,8 @@ class ObjFile : public InputFile {
std::vector<SectionChunk *> guardLJmpChunks;
std::vector<SectionChunk *> guardEHContChunks;
+ std::vector<SectionChunk *> hybmpChunks;
+
// This vector contains a list of all symbols defined or referenced by this
// file. They are indexed such that you can get a Symbol by symbol
// index. Nonexistent indices (which are occupied by auxiliary
diff --git a/lld/COFF/MarkLive.cpp b/lld/COFF/MarkLive.cpp
index 2cf216a6aaad56..06079a98f2d00a 100644
--- a/lld/COFF/MarkLive.cpp
+++ b/lld/COFF/MarkLive.cpp
@@ -68,6 +68,10 @@ void markLive(COFFLinkerContext &ctx) {
// Mark associative sections if any.
for (SectionChunk &c : sc->children())
enqueue(&c);
+
+ // Mark EC entry thunks.
+ if (Defined *entryThunk = sc->getEntryThunk())
+ addSym(entryThunk);
}
}
}
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 3accf24663c6a0..b4ce79879b6b9f 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -561,6 +561,25 @@ std::pair<Symbol *, bool> SymbolTable::insert(StringRef name, InputFile *file) {
return result;
}
+void SymbolTable::addEntryThunk(Symbol *from, Symbol *to) {
+ entryThunks.push_back({from, to});
+}
+
+void SymbolTable::initializeEntryThunks() {
+ for (auto it : entryThunks) {
+ auto *to = dyn_cast<Defined>(it.second);
+ if (!to)
+ continue;
+ auto *from = dyn_cast<DefinedRegular>(it.first);
+ if (!from || !from->getChunk()->isCOMDAT() ||
+ cast<DefinedRegular>(from)->getValue()) {
+ error("non COMDAT symbol '" + from->getName() + "' in hybrid map");
+ continue;
+ }
+ from->getChunk()->setEntryThunk(to);
+ }
+}
+
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
bool isWeakAlias) {
auto [s, wasInserted] = insert(name, f);
diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h
index fc623c2840d401..93b376b69f7ecf 100644
--- a/lld/COFF/SymbolTable.h
+++ b/lld/COFF/SymbolTable.h
@@ -106,6 +106,8 @@ class SymbolTable {
Symbol *addImportThunk(StringRef name, DefinedImportData *s,
uint16_t machine);
void addLibcall(StringRef name);
+ void addEntryThunk(Symbol *from, Symbol *to);
+ void initializeEntryThunks();
void reportDuplicate(Symbol *existing, InputFile *newFile,
SectionChunk *newSc = nullptr,
@@ -134,6 +136,7 @@ class SymbolTable {
llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> symMap;
std::unique_ptr<BitcodeCompiler> lto;
bool ltoCompilationDone = false;
+ std::vector<std::pair<Symbol *, Symbol *>> entryThunks;
COFFLinkerContext &ctx;
};
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 9c20bbb83d86d1..43f71d8cc385cf 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -1532,6 +1532,10 @@ void Writer::assignAddresses() {
}
if (padding && c->isHotPatchable())
virtualSize += padding;
+ // If chunk has EC entry thunk, reserve a space for an offset to the
+ // thunk.
+ if (c->getEntryThunk())
+ virtualSize += sizeof(uint32_t);
virtualSize = alignTo(virtualSize, c->getAlignment());
c->setRVA(rva + virtualSize);
virtualSize += c->getSize();
diff --git a/lld/test/COFF/arm64ec-entry-thunk.s b/lld/test/COFF/arm64ec-entry-thunk.s
new file mode 100644
index 00000000000000..280c132e32e1a9
--- /dev/null
+++ b/lld/test/COFF/arm64ec-entry-thunk.s
@@ -0,0 +1,345 @@
+// REQUIRES: aarch64
+// RUN: split-file %s %t.dir && cd %t.dir
+
+#--- test-simple.s
+// Build a simple function with an entry thunk.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+
+ .data
+ .rva func
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadcfg.obj
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-simple.s -o test-simple.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-simple.dll loadcfg.obj test-simple.obj
+// RUN: llvm-objdump -d out-simple.dll | FileCheck --check-prefix=DISASM %s
+
+// DISASM: Disassembly of section .text:
+// DISASM-EMPTY:
+// DISASM-NEXT: 0000000180001000 <.text>:
+// DISASM-NEXT: 180001000: 00000009 udf #0x9
+// DISASM-NEXT: 180001004: 52800020 mov w0, #0x1 // =1
+// DISASM-NEXT: 180001008: d65f03c0 ret
+// DISASM-NEXT: 18000100c: 52800140 mov w0, #0xa // =10
+// DISASM-NEXT: 180001010: d65f03c0 ret
+
+// RUN: llvm-readobj --sections out-simple.dll | FileCheck --check-prefix=HYBMP %s
+// HYBMP-NOT: .hybmp
+
+// RUN: lld-link -machine:arm64x -dll -noentry -out:out-simplex.dll loadcfg.obj test-simple.obj
+// RUN: llvm-objdump -d out-simplex.dll | FileCheck --check-prefix=DISASM %s
+
+#--- test-split-func.s
+// Build a simple function with an entry thunk, but pass it in multiple files.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ ret
+
+#--- test-split-thunk.s
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+#--- test-split-hybmp.s
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+
+#--- test-split-data.s
+ .data
+ .rva func
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-split-func.s -o test-split-func.obj
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-split-thunk.s -o test-split-thunk.obj
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-split-hybmp.s -o test-split-hybmp.obj
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-split-data.s -o test-split-data.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-split.dll loadcfg.obj \
+// RUN: test-split-func.obj test-split-thunk.obj test-split-data.obj test-split-hybmp.obj
+// RUN: llvm-objdump -d out-split.dll | FileCheck --check-prefix=DISASM %s
+
+#--- test-align.s
+// Build multiple functions with thunks and various alignments and check that entry thunk offsets
+// are correctly placed.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ nop
+ ret
+
+ .section .text,"xr",discard,func2
+ .globl func2
+ .p2align 2
+func2:
+ mov w0, #2
+ ret
+
+ .section .text,"xr",discard,func3
+ .globl func3
+ .p2align 3
+func3:
+ mov w0, #3
+ nop
+ ret
+
+ .section .text,"xr",discard,func4
+ .globl func4
+ .p2align 3
+func4:
+ mov w0, #4
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk2
+ .globl thunk2
+ .p2align 2
+thunk2:
+ mov w0, #20
+ ret
+
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+ .symidx func2
+ .symidx thunk2
+ .word 1
+ .symidx func3
+ .symidx thunk
+ .word 1
+ .symidx func4
+ .symidx thunk
+ .word 1
+
+ .data
+ .rva func
+ .rva func2
+ .rva func3
+ .rva func4
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-align.s -o test-align.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-align.dll loadcfg.obj test-align.obj
+// RUN: llvm-objdump -d out-align.dll | FileCheck --check-prefix=ALIGN %s
+
+// ALIGN: Disassembly of section .text:
+// ALIGN-EMPTY:
+// ALIGN-NEXT: 0000000180001000 <.text>:
+// ALIGN-NEXT: 180001000: 00000035 udf #0x35
+// ALIGN-NEXT: 180001004: 52800020 mov w0, #0x1 // =1
+// ALIGN-NEXT: 180001008: d503201f nop
+// ALIGN-NEXT: 18000100c: d65f03c0 ret
+// ALIGN-NEXT: 180001010: 0000002d udf #0x2d
+// ALIGN-NEXT: 180001014: 52800040 mov w0, #0x2 // =2
+// ALIGN-NEXT: 180001018: d65f03c0 ret
+// ALIGN-NEXT: 18000101c: 00000019 udf #0x19
+// ALIGN-NEXT: 180001020: 52800060 mov w0, #0x3 // =3
+// ALIGN-NEXT: 180001024: d503201f nop
+// ALIGN-NEXT: 180001028: d65f03c0 ret
+// ALIGN-NEXT: 18000102c: 00000009 udf #0x9
+// ALIGN-NEXT: 180001030: 52800080 mov w0, #0x4 // =4
+// ALIGN-NEXT: 180001034: d65f03c0 ret
+// ALIGN-NEXT: 180001038: 52800140 mov w0, #0xa // =10
+// ALIGN-NEXT: 18000103c: d65f03c0 ret
+// ALIGN-NEXT: 180001040: 52800280 mov w0, #0x14 // =20
+// ALIGN-NEXT: 180001044: d65f03c0 ret
+
+#--- test-icf-thunk.s
+// Build two functions with identical entry thunks and check that thunks are merged by ICF.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ ret
+
+ .section .text,"xr",discard,func2
+ .globl func2
+ .p2align 2
+func2:
+ mov w0, #2
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk2
+ .globl thunk2
+ .p2align 2
+thunk2:
+ mov w0, #10
+ ret
+
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+ .symidx func2
+ .symidx thunk2
+ .word 1
+
+ .data
+ .rva func
+ .rva func2
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-icf-thunk.s -o test-icf-thunk.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-icf-thunk.dll loadcfg.obj test-icf-thunk.obj
+// RUN: llvm-objdump -d out-icf-thunk.dll | FileCheck --check-prefix=ICF-THUNK %s
+
+// ICF-THUNK: Disassembly of section .text:
+// ICF-THUNK-EMPTY:
+// ICF-THUNK-NEXT: 0000000180001000 <.text>:
+// ICF-THUNK-NEXT: 180001000: 00000015 udf #0x15
+// ICF-THUNK-NEXT: 180001004: 52800020 mov w0, #0x1 // =1
+// ICF-THUNK-NEXT: 180001008: d65f03c0 ret
+// ICF-THUNK-NEXT: 18000100c: 00000009 udf #0x9
+// ICF-THUNK-NEXT: 180001010: 52800040 mov w0, #0x2 // =2
+// ICF-THUNK-NEXT: 180001014: d65f03c0 ret
+// ICF-THUNK-NEXT: 180001018: 52800140 mov w0, #0xa // =10
+// ICF-THUNK-NEXT: 18000101c: d65f03c0 ret
+
+#--- test-icf-diff-thunk.s
+// Build two identical functions with different entry thunks and check that they are not merged by ICF.
+
+ .section .text,"xr",discard,func
+ .globl func
+ .p2align 2
+func:
+ mov w0, #1
+ ret
+
+ .section .text,"xr",discard,func2
+ .globl func2
+ .p2align 2
+func2:
+ mov w0, #1
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk
+ .globl thunk
+ .p2align 2
+thunk:
+ mov w0, #10
+ ret
+
+ .section .wowthk$aa,"xr",discard,thunk2
+ .globl thunk2
+ .p2align 2
+thunk2:
+ mov w0, #20
+ ret
+
+ .section .hybmp$x, "yi"
+ .symidx func
+ .symidx thunk
+ .word 1
+ .symidx func2
+ .symidx thunk2
+ .word 1
+
+ .data
+ .rva func
+ .rva func2
+
+// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-icf-diff-thunk.s -o test-icf-diff-thunk.obj
+// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-icf-diff-thunk.dll loadcfg.obj test-icf-diff-thunk.obj
+// RUN: llvm-objdump -d out-icf-diff-thunk.dll | FileCheck --check-prefix=ICF-DIFF-THUNK %s
+
+// ICF-DIFF-THUNK: Disassembly of section .text:
+// IC...
[truncated]
|
ead2d1b
to
ff1f5bb
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks quite good to me overall. A handful of minor requests for more comments or general discussion.
@@ -260,6 +297,8 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber, | |||
guardEHContChunks.push_back(c); | |||
else if (name == ".sxdata") | |||
sxDataChunks.push_back(c); | |||
else if (isArm64EC(getMachineType()) && name == ".hybmp$x") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will the section name always have the $x
suffix, or should we have something like name.substr(0, name.find('$')) == ".hybmp"
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I checked with MSVC, it seems to apply the special treatment only for that exact name. Things like ".hybmp", ".hybmp$xy" or ".hybmp$y" don't get that treatment.
lld/COFF/InputFiles.cpp
Outdated
for (SectionChunk *chunk : hybmpChunks) { | ||
if (chunk->getContents().size() % sizeof(ECMapEntry)) { | ||
error("Invalid .hybmp chunk size " + Twine(chunk->getContents().size())); | ||
return; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it make sense to just continue to the next chunk, instead of flat out returning, if one chunk has an unexpected size? (OTOH, it's probably not worth continuing doing lots of work as the link will fail anyway, but one general design principle in lld, is that we try not to abort directly on the first error, but print as many relevant errors to the user as possible in one go.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, I changed it.
@@ -806,7 +806,7 @@ enum Feat00Flags : uint32_t { | |||
Kernel = 0x40000000, | |||
}; | |||
|
|||
enum class Arm64ECThunkType : uint8_t { | |||
enum Arm64ECThunkType : uint8_t { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this change needed here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's to make switch statement in initializeECThunks
possible. Otherwise, there is no implicit conversion from ulittle32_t
and the switch statement would need an explicit cast. And with that cast, we wouldn't be able to use default
far the warning. I could probably make it work, but the main motivation for having it in a public header was LLD (in #85936), changing it here felt right.
lld/test/COFF/arm64ec-entry-thunk.s
Outdated
.section .hybmp$x, "yi" | ||
.symidx func | ||
.symidx thunk | ||
.word 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A comment with what the .word 1
means (the thunk type) would be good, maybe just # entry thunk
on the same line.
|
||
// Write the offset to EC entry thunk preceding section contents. | ||
if (Defined *entryThunk = getEntryThunk()) | ||
write32le(buf - sizeof(uint32_t), entryThunk->getRVA() - rva + 1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The +1
feels quite odd here. I don't doubt that it's right, but it feels quite surprising to see an odd number when it comes to arm64 function offsets (which all should be multiples of 4). Is there something more you can write here to explain it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's what I observed with MSVC and what Windows expects, I can only speculate about the reason. It could be just "have the low bit set", or it could be interpreted as the offset being from the last byte of the padding, not the first byte of the function. I added a comment about it.
@@ -260,6 +297,8 @@ SectionChunk *ObjFile::readSection(uint32_t sectionNumber, | |||
guardEHContChunks.push_back(c); | |||
else if (name == ".sxdata") | |||
sxDataChunks.push_back(c); | |||
else if (isArm64EC(getMachineType()) && name == ".hybmp$x") | |||
hybmpChunks.push_back(c); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, this change both has the effect that the .hybmp
section chunks are getting parsed, but also means that they are skipped and not included in the output file?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, there is no reason for them to be in the output file. Entry thunks are looked up from the offset in padding in runtime. Later, the linker will need exit thunk association for generating import thunks, but once the image is linked, there is no need for such map.
|
||
#--- test-align.s | ||
// Build multiple functions with thunks and various alignments and check that entry thunk offsets | ||
// are correctly placed. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see where the extra alignment actually is visible in the output - it seems like those cases end up perfectly aligned out of the box, so there's no padding visible anywhere in the disassembled text section?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, right, it's missing cases where additional padding is required, I added more tests. (It's still good to make sure that we correctly handle padding that happens to match alignment).
|
||
// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows text-func.s -o text-func.obj | ||
// RUN: not lld-link -machine:arm64ec -dll -noentry -out:test.dll text-func.obj 2>&1 | FileCheck -check-prefix=FUNC-NON-COMDAT %s | ||
// FUNC-NON-COMDAT: error: non COMDAT symbol 'func' in hybrid map |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm curious - why is it relevant for the entry thunk that the function is a COMDAT symbol? (Edit: After looking at more of the code, I realize that this is because we need space to add the offset value before - for non-COMDAT, the section chunk contains multiple functions, and we can't inject the offset right before the function.)
.globl func | ||
.p2align 2, 0x0 | ||
func: | ||
mov w0, #1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It took me a while to spot what was wrong here - that the function label isn't at the start of the section. It could be good to have a comment above this test saying what it tests (even if "offset" kinda gives it away, but it's not very obvious).
It could also be good to spell out a comment in the source, that we also check that the symbol points directly at the start of the section chunk.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added a comment both here and in the code. It also clarifies why COMDATs are needed and adds more comments to tests.
Thanks for the review!
✅ With the latest revision this PR passed the C/C++ code formatter. |
lld/COFF/Chunks.cpp
Outdated
// Write the offset to EC entry thunk preceding section contents. | ||
// Write the offset to EC entry thunk preceding section contents. The low bit | ||
// is always set, so it's effectively an offset from the last byte of the | ||
// padding. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe offset
instead of padding
here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I changed it and pushed. Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM now, thanks! Just one comment wording suggestion.
For x86_64 callable functions, ARM64EC requires an entry thunk generated by the compiler. The linker interprets .hybmp sections to associate function chunks with their entry points and writes an offset to thunks preceding function section contents. Additionally, ICF needs to be aware of entry thunks to not consider chunks to be equal when they have different entry thunks, and GC needs to mark entry thunks together with function chunks. I used a new SectionChunkEC class instead of storing entry thunks in SectionChunk, following the guideline to keep SectionChunk as compact as possible. This way, there is no memory usage increase on non-EC targets.
For x86_64 callable functions, ARM64EC requires an entry thunk generated by the compiler. The linker interprets .hybmp sections to associate function chunks with their entry points and writes an offset to thunks preceding function section contents.
Additionally, ICF needs to be aware of entry thunks to not consider chunks to be equal when they have different entry thunks, and GC needs to mark entry thunks together with function chunks.
I used a new SectionChunkEC class instead of storing entry thunks in SectionChunk, following the guideline to keep SectionChunk as compact as possible. This way, there is no memory usage increase on non-EC targets.