Skip to content

[LLD][COFF] Introduce hybrid symbol table for EC input files on ARM64X #119294

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions lld/COFF/COFFLinkerContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,27 @@ class COFFLinkerContext : public CommonLinkerContext {
SymbolTable symtab;
COFFOptTable optTable;

// A hybrid ARM64EC symbol table on ARM64X target.
std::optional<SymbolTable> hybridSymtab;

// Pointer to the ARM64EC symbol table: either symtab for an ARM64EC target or
// hybridSymtab for an ARM64X target.
SymbolTable *symtabEC = nullptr;

// Returns the appropriate symbol table for the specified machine type.
SymbolTable &getSymtab(llvm::COFF::MachineTypes machine) {
if (hybridSymtab && (machine == ARM64EC || machine == AMD64))
return *hybridSymtab;
return symtab;
}

// Invoke the specified callback for each symbol table.
void forEachSymtab(std::function<void(SymbolTable &symtab)> f) {
f(symtab);
if (hybridSymtab)
f(*hybridSymtab);
}

std::vector<ObjFile *> objFileInstances;
std::map<std::string, PDBInputFile *> pdbInputFileInstances;
std::vector<ImportFile *> importFileInstances;
Expand Down
113 changes: 63 additions & 50 deletions lld/COFF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,17 @@ void LinkerDriver::setMachine(MachineTypes machine) {
assert(machine != IMAGE_FILE_MACHINE_UNKNOWN);

ctx.config.machine = machine;
ctx.symtab.machine = machine;

if (machine != ARM64X) {
ctx.symtab.machine = machine;
if (machine == ARM64EC)
ctx.symtabEC = &ctx.symtab;
} else {
ctx.symtab.machine = ARM64;
ctx.hybridSymtab.emplace(ctx, ARM64EC);
ctx.symtabEC = &*ctx.hybridSymtab;
}

addWinSysRootLibSearchPaths();
}

Expand Down Expand Up @@ -2519,54 +2529,56 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (config->imageBase == uint64_t(-1))
config->imageBase = getDefaultImageBase();

ctx.symtab.addSynthetic(mangle("__ImageBase"), nullptr);
if (config->machine == I386) {
ctx.symtab.addAbsolute("___safe_se_handler_table", 0);
ctx.symtab.addAbsolute("___safe_se_handler_count", 0);
}

ctx.symtab.addAbsolute(mangle("__guard_fids_count"), 0);
ctx.symtab.addAbsolute(mangle("__guard_fids_table"), 0);
ctx.symtab.addAbsolute(mangle("__guard_flags"), 0);
ctx.symtab.addAbsolute(mangle("__guard_iat_count"), 0);
ctx.symtab.addAbsolute(mangle("__guard_iat_table"), 0);
ctx.symtab.addAbsolute(mangle("__guard_longjmp_count"), 0);
ctx.symtab.addAbsolute(mangle("__guard_longjmp_table"), 0);
// Needed for MSVC 2017 15.5 CRT.
ctx.symtab.addAbsolute(mangle("__enclave_config"), 0);
// Needed for MSVC 2019 16.8 CRT.
ctx.symtab.addAbsolute(mangle("__guard_eh_cont_count"), 0);
ctx.symtab.addAbsolute(mangle("__guard_eh_cont_table"), 0);

if (isArm64EC(config->machine)) {
ctx.symtab.addAbsolute("__arm64x_extra_rfe_table", 0);
ctx.symtab.addAbsolute("__arm64x_extra_rfe_table_size", 0);
ctx.symtab.addAbsolute("__arm64x_redirection_metadata", 0);
ctx.symtab.addAbsolute("__arm64x_redirection_metadata_count", 0);
ctx.symtab.addAbsolute("__hybrid_auxiliary_delayload_iat_copy", 0);
ctx.symtab.addAbsolute("__hybrid_auxiliary_delayload_iat", 0);
ctx.symtab.addAbsolute("__hybrid_auxiliary_iat", 0);
ctx.symtab.addAbsolute("__hybrid_auxiliary_iat_copy", 0);
ctx.symtab.addAbsolute("__hybrid_code_map", 0);
ctx.symtab.addAbsolute("__hybrid_code_map_count", 0);
ctx.symtab.addAbsolute("__hybrid_image_info_bitfield", 0);
ctx.symtab.addAbsolute("__x64_code_ranges_to_entry_points", 0);
ctx.symtab.addAbsolute("__x64_code_ranges_to_entry_points_count", 0);
ctx.symtab.addSynthetic("__guard_check_icall_a64n_fptr", nullptr);
ctx.symtab.addSynthetic("__arm64x_native_entrypoint", nullptr);
}

if (config->pseudoRelocs) {
ctx.symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
ctx.symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
}
if (config->mingw) {
ctx.symtab.addAbsolute(mangle("__CTOR_LIST__"), 0);
ctx.symtab.addAbsolute(mangle("__DTOR_LIST__"), 0);
}
if (config->debug || config->buildIDHash != BuildIDHash::None)
if (ctx.symtab.findUnderscore("__buildid"))
ctx.symtab.addUndefined(mangle("__buildid"));
ctx.forEachSymtab([&](SymbolTable &symtab) {
symtab.addSynthetic(mangle("__ImageBase"), nullptr);
if (symtab.machine == I386) {
symtab.addAbsolute("___safe_se_handler_table", 0);
symtab.addAbsolute("___safe_se_handler_count", 0);
}

symtab.addAbsolute(mangle("__guard_fids_count"), 0);
symtab.addAbsolute(mangle("__guard_fids_table"), 0);
symtab.addAbsolute(mangle("__guard_flags"), 0);
symtab.addAbsolute(mangle("__guard_iat_count"), 0);
symtab.addAbsolute(mangle("__guard_iat_table"), 0);
symtab.addAbsolute(mangle("__guard_longjmp_count"), 0);
symtab.addAbsolute(mangle("__guard_longjmp_table"), 0);
// Needed for MSVC 2017 15.5 CRT.
symtab.addAbsolute(mangle("__enclave_config"), 0);
// Needed for MSVC 2019 16.8 CRT.
symtab.addAbsolute(mangle("__guard_eh_cont_count"), 0);
symtab.addAbsolute(mangle("__guard_eh_cont_table"), 0);

if (isArm64EC(ctx.config.machine)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this bit, about what symbols we set in the load config, is going to change further in some subsequent change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this should ultimately use symtab.isEC() instead. Changing it now would break CHPE data in the native load config. CHPE metadata should only exist in the EC namespace, but maintaining compatibility with existing tests requires linker support to copy it from the EC load config to the native one, as well as updates to the writer to correctly set those symbols in the appropriate namespace. This is implemented in this commit.

symtab.addAbsolute("__arm64x_extra_rfe_table", 0);
symtab.addAbsolute("__arm64x_extra_rfe_table_size", 0);
symtab.addAbsolute("__arm64x_redirection_metadata", 0);
symtab.addAbsolute("__arm64x_redirection_metadata_count", 0);
symtab.addAbsolute("__hybrid_auxiliary_delayload_iat_copy", 0);
symtab.addAbsolute("__hybrid_auxiliary_delayload_iat", 0);
symtab.addAbsolute("__hybrid_auxiliary_iat", 0);
symtab.addAbsolute("__hybrid_auxiliary_iat_copy", 0);
symtab.addAbsolute("__hybrid_code_map", 0);
symtab.addAbsolute("__hybrid_code_map_count", 0);
symtab.addAbsolute("__hybrid_image_info_bitfield", 0);
symtab.addAbsolute("__x64_code_ranges_to_entry_points", 0);
symtab.addAbsolute("__x64_code_ranges_to_entry_points_count", 0);
symtab.addSynthetic("__guard_check_icall_a64n_fptr", nullptr);
symtab.addSynthetic("__arm64x_native_entrypoint", nullptr);
}

if (config->pseudoRelocs) {
symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
}
if (config->mingw) {
symtab.addAbsolute(mangle("__CTOR_LIST__"), 0);
symtab.addAbsolute(mangle("__DTOR_LIST__"), 0);
}
if (config->debug || config->buildIDHash != BuildIDHash::None)
if (symtab.findUnderscore("__buildid"))
symtab.addUndefined(mangle("__buildid"));
});

// This code may add new undefined symbols to the link, which may enqueue more
// symbol resolution tasks, so we need to continue executing tasks until we
Expand Down Expand Up @@ -2809,7 +2821,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (auto *arg = args.getLastArg(OPT_print_symbol_order))
config->printSymbolOrder = arg->getValue();

ctx.symtab.initializeECThunks();
if (ctx.symtabEC)
ctx.symtabEC->initializeECThunks();

// Identify unreferenced COMDAT sections.
if (config->doGC) {
Expand Down
18 changes: 10 additions & 8 deletions lld/COFF/InputFiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,15 +114,15 @@ void ArchiveFile::parse() {
file = CHECK(Archive::create(mb), this);

// Try to read symbols from ECSYMBOLS section on ARM64EC.
if (isArm64EC(ctx.config.machine)) {
if (ctx.symtabEC) {
iterator_range<Archive::symbol_iterator> symbols =
CHECK(file->ec_symbols(), this);
if (!symbols.empty()) {
for (const Archive::Symbol &sym : symbols)
ctx.symtab.addLazyArchive(this, sym);
ctx.symtabEC->addLazyArchive(this, sym);

// Read both EC and native symbols on ARM64X.
if (ctx.config.machine != ARM64X)
if (!ctx.hybridSymtab)
return;
}
}
Expand Down Expand Up @@ -163,7 +163,7 @@ lld::coff::getArchiveMembers(COFFLinkerContext &ctx, Archive *file) {
}

ObjFile::ObjFile(COFFLinkerContext &ctx, MemoryBufferRef m, bool lazy)
: InputFile(ctx.symtab, ObjectKind, m, lazy) {}
: InputFile(ctx.getSymtab(getMachineType(m)), ObjectKind, m, lazy) {}

void ObjFile::parseLazy() {
// Native object file.
Expand Down Expand Up @@ -806,10 +806,12 @@ std::optional<Symbol *> ObjFile::createDefined(
return createRegular(sym);
}

MachineTypes ObjFile::getMachineType() const {
if (coffObj)
return static_cast<MachineTypes>(coffObj->getMachine());
return IMAGE_FILE_MACHINE_UNKNOWN;
MachineTypes ObjFile::getMachineType(MemoryBufferRef mb) {
// Extract the machine type directly from the COFF header, as it's the first
// 16-bit field.
uint16_t machine =
*reinterpret_cast<const ulittle16_t *>(mb.getBufferStart());
return MachineTypes(machine);
}

ArrayRef<uint8_t> ObjFile::getDebugSection(StringRef secName) {
Expand Down
3 changes: 2 additions & 1 deletion lld/COFF/InputFiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ class ObjFile : public InputFile {
static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
void parse() override;
void parseLazy();
MachineTypes getMachineType() const override;
MachineTypes getMachineType() const override { return getMachineType(mb); }
static MachineTypes getMachineType(MemoryBufferRef mb);
ArrayRef<Chunk *> getChunks() { return chunks; }
ArrayRef<SectionChunk *> getDebugChunks() { return debugChunks; }
ArrayRef<SectionChunk *> getSXDataChunks() { return sxDataChunks; }
Expand Down
6 changes: 4 additions & 2 deletions lld/COFF/SymbolTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ class Symbol;
// There is one add* function per symbol type.
class SymbolTable {
public:
SymbolTable(COFFLinkerContext &c) : ctx(c) {}
SymbolTable(COFFLinkerContext &c,
llvm::COFF::MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN)
: ctx(c), machine(machine) {}

void addFile(InputFile *file);

Expand Down Expand Up @@ -120,7 +122,7 @@ class SymbolTable {
uint32_t newSectionOffset = 0);

COFFLinkerContext &ctx;
llvm::COFF::MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN;
llvm::COFF::MachineTypes machine;

bool isEC() const { return machine == ARM64EC; }

Expand Down
3 changes: 2 additions & 1 deletion lld/test/COFF/arm64ec-codemap.test
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ RUN: llvm-mc -filetype=obj -triple=arm64ec-windows data-sec2.s -o data-sec2.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows empty-sec.s -o arm64ec-empty-sec.obj
RUN: llvm-mc -filetype=obj -triple=x86_64-windows x86_64-func-sym.s -o x86_64-func-sym.obj
RUN: llvm-mc -filetype=obj -triple=x86_64-windows empty-sec.s -o x86_64-empty-sec.obj
RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj

Link ARM64EC DLL and verify that the code is arranged as expected.
Expand Down Expand Up @@ -51,7 +52,7 @@ RUN: llvm-readobj --coff-load-config test2.dll | FileCheck -check-prefix=CODEMAP
RUN: llvm-objdump -d test2.dll | FileCheck -check-prefix=DISASM %s

RUN: lld-link -out:testx.dll -machine:arm64x arm64-func-sym.obj arm64ec-func-sym.obj \
RUN: x86_64-func-sym.obj loadconfig-arm64ec.obj -dll -noentry
RUN: x86_64-func-sym.obj loadconfig-arm64.obj loadconfig-arm64ec.obj -dll -noentry

Adding empty chunks does not affect code map ranges.

Expand Down
3 changes: 2 additions & 1 deletion lld/test/COFF/arm64ec-entry-thunk.s
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ thunk:
.rva func

// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadcfg.obj
// RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64ec.s -o native-loadcfg.obj
// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test-simple.s -o test-simple.obj
// RUN: lld-link -machine:arm64ec -dll -noentry -out:out-simple.dll loadcfg.obj test-simple.obj
// RUN: llvm-objdump -d out-simple.dll | FileCheck --check-prefix=DISASM %s
Expand All @@ -43,7 +44,7 @@ thunk:
// RUN: llvm-readobj --sections out-simple.dll | FileCheck --check-prefix=HYBMP %s
// HYBMP-NOT: .hybmp

// RUN: lld-link -machine:arm64x -dll -noentry -out:out-simplex.dll loadcfg.obj test-simple.obj
// RUN: lld-link -machine:arm64x -dll -noentry -out:out-simplex.dll native-loadcfg.obj loadcfg.obj test-simple.obj
// RUN: llvm-objdump -d out-simplex.dll | FileCheck --check-prefix=DISASM %s

#--- test-split-func.s
Expand Down
4 changes: 3 additions & 1 deletion lld/test/COFF/arm64ec-lib.test
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ RUN: llvm-mc -filetype=obj -triple=arm64ec-windows ref-alias.s -o ref-alias.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows ref-thunk.s -o ref-thunk.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows func.s -o func.obj
RUN: llvm-mc -filetype=obj -triple=x86_64-windows func-x86_64.s -o func-x86_64.obj
RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj

RUN: llvm-lib -machine:arm64ec -out:sym-arm64ec.lib sym-arm64ec.obj nsym-aarch64.obj
Expand All @@ -26,7 +27,8 @@ Verify that a symbol can be referenced from a regular archive map when ECSYMBOLS
RUN: lld-link -machine:arm64ec -dll -noentry -out:test2.dll symref-arm64ec.obj sym-x86_64.lib loadconfig-arm64ec.obj

Verify that both native and EC symbols can be referenced in a hybrid target.
RUN: lld-link -machine:arm64x -dll -noentry -out:test3.dll symref-arm64ec.obj nsymref-aarch64.obj sym-arm64ec.lib loadconfig-arm64ec.obj
RUN: lld-link -machine:arm64x -dll -noentry -out:test3.dll symref-arm64ec.obj nsymref-aarch64.obj sym-arm64ec.lib \
RUN: loadconfig-arm64.obj loadconfig-arm64ec.obj

Ensure that an EC symbol is not resolved using a regular symbol map.
RUN: not lld-link -machine:arm64ec -dll -noentry -out:test-err.dll nsymref-arm64ec.obj sym-arm64ec.lib loadconfig-arm64ec.obj 2>&1 |\
Expand Down
9 changes: 5 additions & 4 deletions lld/test/COFF/arm64ec-range-thunks.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# RUN: llvm-mc -filetype=obj -triple=aarch64-windows native-funcs.s -o funcs-aarch64.obj
# RUN: llvm-mc -filetype=obj -triple=x86_64-windows space.s -o space-x86_64.obj
# RUN: llvm-mc -filetype=obj -triple=aarch64-windows space.s -o space-aarch64.obj
# RUN: llvm-mc -filetype=obj -triple=aarch64-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64.obj
# RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj


Expand Down Expand Up @@ -59,8 +60,8 @@

# A similar test using a hybrid binary and native placeholder chunks.

# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj space-aarch64.obj loadconfig-arm64ec.obj -out:testx.dll \
# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj space-aarch64.obj loadconfig-arm64.obj loadconfig-arm64ec.obj \
# RUN: -out:testx.dll -verbose 2>&1 | FileCheck -check-prefix=VERBOSE %s
# RUN: llvm-objdump -d testx.dll | FileCheck --check-prefix=DISASM %s

# RUN: llvm-readobj --coff-load-config testx.dll | FileCheck --check-prefix=LOADCFGX %s
Expand All @@ -74,8 +75,8 @@

# Test a hybrid ARM64X binary which requires range extension thunks for both native and EC relocations.

# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj funcs-aarch64.obj loadconfig-arm64ec.obj -out:testx2.dll \
# RUN: -verbose 2>&1 | FileCheck -check-prefix=VERBOSEX %s
# RUN: lld-link -machine:arm64x -noentry -dll funcs-arm64ec.obj funcs-aarch64.obj loadconfig-arm64.obj loadconfig-arm64ec.obj \
# RUN: -out:testx2.dll -verbose 2>&1 | FileCheck -check-prefix=VERBOSEX %s
# VERBOSEX: Added 5 thunks with margin {{.*}} in 1 passes

# RUN: llvm-objdump -d testx2.dll | FileCheck --check-prefix=DISASMX %s
Expand Down
38 changes: 38 additions & 0 deletions lld/test/COFF/arm64x-symtab.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// REQUIRES: aarch64, x86
// RUN: split-file %s %t.dir && cd %t.dir

// RUN: llvm-mc -filetype=obj -triple=aarch64-windows sym.s -o sym-aarch64.obj
// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows sym.s -o sym-arm64ec.obj
// RUN: llvm-mc -filetype=obj -triple=x86_64-windows sym.s -o sym-x86_64.obj
// RUN: llvm-mc -filetype=obj -triple=aarch64-windows symref.s -o symref-aarch64.obj
// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows symref.s -o symref-arm64ec.obj
// RUN: llvm-lib -machine:arm64x -out:sym.lib sym-aarch64.obj sym-arm64ec.obj

// Check that native object files can't reference EC symbols.

// RUN: not lld-link -machine:arm64x -dll -noentry -out:err1.dll symref-aarch64.obj sym-arm64ec.obj \
// RUN: 2>&1 | FileCheck --check-prefix=UNDEF %s
// UNDEF: lld-link: error: undefined symbol: sym
// UNDEF-NEXT: >>> referenced by symref-aarch64.obj:(.data)

// RUN: not lld-link -machine:arm64x -dll -noentry -out:err2.dll symref-aarch64.obj sym-x86_64.obj \
// RUN: 2>&1 | FileCheck --check-prefix=UNDEF %s

// Check that ARM64X target can have the same symbol names in both native and EC namespaces.

// RUN: lld-link -machine:arm64x -dll -noentry -out:out.dll symref-aarch64.obj sym-aarch64.obj \
// RUN: symref-arm64ec.obj sym-x86_64.obj

// Check that ARM64X target can reference both native and EC symbols from an archive.

// RUN: lld-link -machine:arm64x -dll -noentry -out:out2.dll symref-aarch64.obj symref-arm64ec.obj sym.lib

#--- symref.s
.data
.rva sym

#--- sym.s
.data
.globl sym
sym:
.word 0
Loading