-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLD][COFF] Add support for ARM64EC delay-load imports #110042
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Fill the regular delay-load IAT with x86_64 delay-load thunks. Similarly to regular imports, create an auxiliary IAT and its copy for ARM64EC calls. These are filled with the same `__impchk_` thunks used for regular imports, which perform an indirect call with `__icall_helper_arm64ec` on the regular delay-load IAT. These auxiliary IATs are exposed via CHPE metadata starting from version 2. The MSVC linker creates one more copy of the auxiliary IAT. `__imp_func` symbols refer to that hidden IAT, while the `#func` thunk performs a call with the public auxiliary IAT. If the public auxiliary IAT is fine for `#func`, it should be fine for calls using the `__imp_func` symbol as well. Therefore, I made `__func` refer to that IAT too.
@llvm/pr-subscribers-platform-windows @llvm/pr-subscribers-lld Author: Jacek Caban (cjacek) ChangesFill the regular delay-load IAT with x86_64 delay-load thunks. Similarly to regular imports, create an auxiliary IAT and its copy for ARM64EC calls. These are filled with the same The MSVC linker creates one more copy of the auxiliary IAT. Full diff: https://github.com/llvm/llvm-project/pull/110042.diff 6 Files Affected:
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index 39dcce9fe84837..2d20b094888c7a 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -812,6 +812,16 @@ void DelayLoadContents::create(Defined *h) {
s->loadThunkSym =
cast<DefinedSynthetic>(ctx.symtab.addSynthetic(symName, t));
}
+
+ if (s->file->impECSym) {
+ auto chunk = make<AuxImportChunk>(s->file);
+ auxIat.push_back(chunk);
+ s->file->impECSym->setLocation(chunk);
+
+ chunk = make<AuxImportChunk>(s->file);
+ auxIatCopy.push_back(chunk);
+ s->file->auxImpCopySym->setLocation(chunk);
+ }
}
thunks.push_back(tm);
if (pdataChunk)
@@ -822,6 +832,10 @@ void DelayLoadContents::create(Defined *h) {
// Terminate with null values.
addresses.push_back(make<NullChunk>(8));
names.push_back(make<NullChunk>(8));
+ if (ctx.config.machine == ARM64EC) {
+ auxIat.push_back(make<NullChunk>(8));
+ auxIatCopy.push_back(make<NullChunk>(8));
+ }
for (int i = 0, e = syms.size(); i < e; ++i)
syms[i]->setLocation(addresses[base + i]);
@@ -845,6 +859,7 @@ void DelayLoadContents::create(Defined *h) {
Chunk *DelayLoadContents::newTailMergeChunk(Chunk *dir) {
switch (ctx.config.machine) {
case AMD64:
+ case ARM64EC:
return make<TailMergeChunkX64>(dir, helper);
case I386:
return make<TailMergeChunkX86>(ctx, dir, helper);
@@ -880,6 +895,7 @@ Chunk *DelayLoadContents::newThunkChunk(DefinedImportData *s,
Chunk *tailMerge) {
switch (ctx.config.machine) {
case AMD64:
+ case ARM64EC:
return make<ThunkChunkX64>(s, tailMerge);
case I386:
return make<ThunkChunkX86>(ctx, s, tailMerge);
diff --git a/lld/COFF/DLL.h b/lld/COFF/DLL.h
index afb46f22ec9e14..f7d2b57a20a020 100644
--- a/lld/COFF/DLL.h
+++ b/lld/COFF/DLL.h
@@ -48,6 +48,8 @@ class DelayLoadContents {
ArrayRef<Chunk *> getCodeChunks() { return thunks; }
ArrayRef<Chunk *> getCodePData() { return pdata; }
ArrayRef<Chunk *> getCodeUnwindInfo() { return unwindinfo; }
+ ArrayRef<Chunk *> getAuxIat() { return auxIat; }
+ ArrayRef<Chunk *> getAuxIatCopy() { return auxIatCopy; }
uint64_t getDirRVA() { return dirs[0]->getRVA(); }
uint64_t getDirSize();
@@ -69,6 +71,8 @@ class DelayLoadContents {
std::vector<Chunk *> pdata;
std::vector<Chunk *> unwindinfo;
std::vector<Chunk *> dllNames;
+ std::vector<Chunk *> auxIat;
+ std::vector<Chunk *> auxIatCopy;
COFFLinkerContext &ctx;
};
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 5a6a4a61030e64..6a880b64c58586 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2465,6 +2465,8 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
ctx.symtab.addAbsolute("__arm64x_extra_rfe_table_size", 0);
ctx.symtab.addAbsolute("__arm64x_redirection_metadata", 0);
ctx.symtab.addAbsolute("__arm64x_redirection_metadata_count", 0);
+ ctx.symtab.addAbsolute("__hybrid_auxiliary_delayload_iat_copy", 0);
+ ctx.symtab.addAbsolute("__hybrid_auxiliary_delayload_iat", 0);
ctx.symtab.addAbsolute("__hybrid_auxiliary_iat", 0);
ctx.symtab.addAbsolute("__hybrid_auxiliary_iat_copy", 0);
ctx.symtab.addAbsolute("__hybrid_code_map", 0);
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 7cf723a8cf103f..5cbedc89b3642e 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -949,6 +949,13 @@ void Writer::appendECImportTables() {
auxIat->chunks.end());
rdataSec->addContributingPartialSection(auxIat);
}
+
+ if (!delayIdata.getAuxIat().empty()) {
+ delayIdata.getAuxIat().front()->setAlignment(0x1000);
+ rdataSec->chunks.insert(rdataSec->chunks.end(),
+ delayIdata.getAuxIat().begin(),
+ delayIdata.getAuxIat().end());
+ }
}
// Locate the first Chunk and size of the import directory list and the
@@ -1285,6 +1292,8 @@ void Writer::appendImportThunks() {
textSec->addChunk(c);
for (Chunk *c : delayIdata.getCodePData())
pdataSec->addChunk(c);
+ for (Chunk *c : delayIdata.getAuxIatCopy())
+ rdataSec->addChunk(c);
for (Chunk *c : delayIdata.getCodeUnwindInfo())
rdataSec->addChunk(c);
}
@@ -2286,6 +2295,20 @@ void Writer::setECSymbols() {
replaceSymbol<DefinedSynthetic>(
iatCopySym, "__hybrid_auxiliary_iat_copy",
idata.auxIatCopy.empty() ? nullptr : idata.auxIatCopy.front());
+
+ Symbol *delayIatSym =
+ ctx.symtab.findUnderscore("__hybrid_auxiliary_delayload_iat");
+ replaceSymbol<DefinedSynthetic>(
+ delayIatSym, "__hybrid_auxiliary_delayload_iat",
+ delayIdata.getAuxIat().empty() ? nullptr
+ : delayIdata.getAuxIat().front());
+
+ Symbol *delayIatCopySym =
+ ctx.symtab.findUnderscore("__hybrid_auxiliary_delayload_iat_copy");
+ replaceSymbol<DefinedSynthetic>(
+ delayIatCopySym, "__hybrid_auxiliary_delayload_iat_copy",
+ delayIdata.getAuxIatCopy().empty() ? nullptr
+ : delayIdata.getAuxIatCopy().front());
}
// Write section contents to a mmap'ed file.
diff --git a/lld/test/COFF/Inputs/loadconfig-arm64ec.s b/lld/test/COFF/Inputs/loadconfig-arm64ec.s
index 80ec893869e6fa..26bcc66853f789 100644
--- a/lld/test/COFF/Inputs/loadconfig-arm64ec.s
+++ b/lld/test/COFF/Inputs/loadconfig-arm64ec.s
@@ -79,8 +79,8 @@ __chpe_metadata:
.word __arm64x_extra_rfe_table_size
.rva __os_arm64x_dispatch_fptr
.rva __hybrid_auxiliary_iat_copy
- .word 0 // __hybrid_auxiliary_delayload_iat
- .word 0 // __hybrid_auxiliary_delayload_iat_copy
+ .rva __hybrid_auxiliary_delayload_iat
+ .rva __hybrid_auxiliary_delayload_iat_copy
.word 0 // __hybrid_image_info_bitfield
.rva __os_arm64x_helper3
.rva __os_arm64x_helper4
diff --git a/lld/test/COFF/arm64ec-delayimport.test b/lld/test/COFF/arm64ec-delayimport.test
new file mode 100644
index 00000000000000..a0236d902eeaba
--- /dev/null
+++ b/lld/test/COFF/arm64ec-delayimport.test
@@ -0,0 +1,201 @@
+REQUIRES: aarch64, x86
+RUN: split-file %s %t.dir && cd %t.dir
+
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows test.s -o test.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj
+RUN: llvm-lib -machine:arm64ec -def:test.def -out:test-arm64ec.lib
+RUN: llvm-lib -machine:arm64ec -def:test2.def -out:test2-arm64ec.lib
+
+RUN: lld-link -machine:arm64ec -dll -noentry -out:out.dll loadconfig-arm64ec.obj test.obj \
+RUN: test-arm64ec.lib test2-arm64ec.lib -delayload:test.dll -map
+
+RUN: llvm-readobj --hex-dump=.test out.dll | FileCheck --check-prefix=TESTSEC %s
+TESTSEC: 0x180008000 00600000 88700000 00200000 10100000
+TESTSEC-NEXT: 0x180008010 08600000 90700000 10200000 30100000
+TESTSEC-NEXT: 0x180008020 1c100000 3c100000 00300000
+
+RUN: llvm-objdump -d out.dll | FileCheck --check-prefix=DISASM %s
+DISASM: 0000000180001000 <.text>:
+DISASM-NEXT: 80001000: 52800000 mov w0, #0x0 // =0
+DISASM-NEXT: 180001004: d65f03c0 ret
+DISASM-NEXT: 180001008: 52800020 mov w0, #0x1 // =1
+DISASM-NEXT: 18000100c: d65f03c0 ret
+DISASM-NEXT: 180001010: b0000030 adrp x16, 0x180006000
+DISASM-NEXT: 180001014: f9400210 ldr x16, [x16]
+DISASM-NEXT: 180001018: d61f0200 br x16
+DISASM-NEXT: 18000101c: d000002b adrp x11, 0x180007000
+DISASM-NEXT: 180001020: f940456b ldr x11, [x11, #0x88]
+DISASM-NEXT: 180001024: 9000000a adrp x10, 0x180001000 <.text>
+DISASM-NEXT: 180001028: 9101414a add x10, x10, #0x50
+DISASM-NEXT: 18000102c: 17fffff5 b 0x180001000 <.text>
+DISASM-NEXT: 180001030: b0000030 adrp x16, 0x180006000
+DISASM-NEXT: 180001034: f9400610 ldr x16, [x16, #0x8]
+DISASM-NEXT: 180001038: d61f0200 br x16
+DISASM-NEXT: 18000103c: d000002b adrp x11, 0x180007000
+DISASM-NEXT: 180001040: f940496b ldr x11, [x11, #0x90]
+DISASM-NEXT: 180001044: 9000000a adrp x10, 0x180001000 <.text>
+DISASM-NEXT: 180001048: 9101614a add x10, x10, #0x58
+DISASM-NEXT: 18000104c: 17ffffed b 0x180001000 <.text>
+DISASM-NEXT: 180001050: 52800040 mov w0, #0x2 // =2
+DISASM-NEXT: 180001054: d65f03c0 ret
+DISASM-NEXT: 180001058: 52800060 mov w0, #0x3 // =3
+DISASM-NEXT: 18000105c: d65f03c0 ret
+DISASM-NEXT: ...
+DISASM-NEXT: 180002000: ff 25 82 50 00 00 jmpq *0x5082(%rip) # 0x180007088
+DISASM-NEXT: ...
+DISASM-NEXT: 18000200e: 00 00 addb %al, (%rax)
+DISASM-NEXT: 180002010: ff 25 7a 50 00 00 jmpq *0x507a(%rip) # 0x180007090
+DISASM-NEXT: 180002016: 48 8d 05 6b 50 00 00 leaq 0x506b(%rip), %rax # 0x180007088
+DISASM-NEXT: 18000201d: e9 0c 00 00 00 jmp 0x18000202e <.text+0x102e>
+DISASM-NEXT: 180002022: 48 8d 05 67 50 00 00 leaq 0x5067(%rip), %rax # 0x180007090
+DISASM-NEXT: 180002029: e9 00 00 00 00 jmp 0x18000202e <.text+0x102e>
+DISASM-NEXT: 18000202e: 51 pushq %rcx
+DISASM-NEXT: 18000202f: 52 pushq %rdx
+DISASM-NEXT: 180002030: 41 50 pushq %r8
+DISASM-NEXT: 180002032: 41 51 pushq %r9
+DISASM-NEXT: 180002034: 48 83 ec 48 subq $0x48, %rsp
+DISASM-NEXT: 180002038: 66 0f 7f 04 24 movdqa %xmm0, (%rsp)
+DISASM-NEXT: 18000203d: 66 0f 7f 4c 24 10 movdqa %xmm1, 0x10(%rsp)
+DISASM-NEXT: 180002043: 66 0f 7f 54 24 20 movdqa %xmm2, 0x20(%rsp)
+DISASM-NEXT: 180002049: 66 0f 7f 5c 24 30 movdqa %xmm3, 0x30(%rsp)
+DISASM-NEXT: 18000204f: 48 8b d0 movq %rax, %rdx
+DISASM-NEXT: 180002052: 48 8d 0d 97 21 00 00 leaq 0x2197(%rip), %rcx # 0x1800041f0
+DISASM-NEXT: 180002059: e8 aa ef ff ff callq 0x180001008 <.text+0x8>
+DISASM-NEXT: 18000205e: 66 0f 6f 04 24 movdqa (%rsp), %xmm0
+DISASM-NEXT: 180002063: 66 0f 6f 4c 24 10 movdqa 0x10(%rsp), %xmm1
+DISASM-NEXT: 180002069: 66 0f 6f 54 24 20 movdqa 0x20(%rsp), %xmm2
+DISASM-NEXT: 18000206f: 66 0f 6f 5c 24 30 movdqa 0x30(%rsp), %xmm3
+DISASM-NEXT: 180002075: 48 83 c4 48 addq $0x48, %rsp
+DISASM-NEXT: 180002079: 41 59 popq %r9
+DISASM-NEXT: 18000207b: 41 58 popq %r8
+DISASM-NEXT: 18000207d: 5a popq %rdx
+DISASM-NEXT: 18000207e: 59 popq %rcx
+DISASM-NEXT: 18000207f: ff e0 jmpq *%rax
+
+RUN: llvm-readobj --coff-load-config out.dll | FileCheck --check-prefix=LOADCFG %s
+LOADCFG: CHPEMetadata [
+LOADCFG: AuxiliaryDelayloadIAT: 0x6000
+LOADCFG-NEXT: AuxiliaryDelayloadIATCopy: 0x4000
+
+RUN: llvm-readobj --coff-imports out.dll | FileCheck --check-prefix=IMPORTS %s
+IMPORTS: DelayImport {
+IMPORTS-NEXT: Name: test.dll
+IMPORTS-NEXT: Attributes: 0x1
+IMPORTS-NEXT: ModuleHandle: 0x7080
+IMPORTS-NEXT: ImportAddressTable: 0x7088
+IMPORTS-NEXT: ImportNameTable: 0x4230
+IMPORTS-NEXT: BoundDelayImportTable: 0x0
+IMPORTS-NEXT: UnloadDelayImportTable: 0x0
+IMPORTS-NEXT: Import {
+IMPORTS-NEXT: Symbol: func (0)
+IMPORTS-NEXT: Address: 0x180002016
+IMPORTS-NEXT: }
+IMPORTS-NEXT: Import {
+IMPORTS-NEXT: Symbol: func2 (0)
+IMPORTS-NEXT: Address: 0x180002022
+IMPORTS-NEXT: }
+IMPORTS-NEXT: }
+
+RUN: FileCheck --check-prefix=MAP %s < out.map
+MAP: 0001:00000008 #__delayLoadHelper2 0000000180001008 test.obj
+MAP: 0001:00000010 #func 0000000180001010 test-arm64ec:test.dll
+MAP-NEXT: 0001:0000001c __impchk_func 000000018000101c test-arm64ec:test.dll
+MAP-NEXT: 0001:00000030 #func2 0000000180001030 test-arm64ec:test.dll
+MAP-NEXT: 0001:0000003c __impchk_func2 000000018000103c test-arm64ec:test.dll
+MAP-NEXT: 0001:00000050 func_exit_thunk 0000000180001050 test.obj
+MAP-NEXT: 0001:00000058 func2_exit_thunk 0000000180001058 test.obj
+MAP-NEXT: 0001:00001000 func 0000000180002000 test-arm64ec:test.dll
+MAP-NEXT: 0001:00001010 func2 0000000180002010 test-arm64ec:test.dll
+MAP-NEXT: 0002:00000000 __imp_data 0000000180003000 test2-arm64ec:test2.dll
+MAP-NEXT: 0000:00000000 __hybrid_auxiliary_delayload_iat_copy 0000000180004000 <linker-defined>
+MAP-NEXT: 0002:00001000 __auximpcopy_func 0000000180004000 test-arm64ec:test.dll
+MAP-NEXT: 0002:00001008 __auximpcopy_func2 0000000180004008 test-arm64ec:test.dll
+MAP: 0002:00003000 __imp_func 0000000180006000 test-arm64ec:test.dll
+MAP-NEXT: 0002:00003008 __imp_func2 0000000180006008 test-arm64ec:test.dll
+MAP: 0003:00000088 __imp_aux_func 0000000180007088 test-arm64ec:test.dll
+MAP-NEXT: 0003:00000090 __imp_aux_func2 0000000180007090 test-arm64ec:test.dll
+
+RUN: llvm-readobj --hex-dump=.rdata out.dll | FileCheck --check-prefix=RDATA %s
+RDATA: 0x180004000 1c100080 01000000 3c100080 01000000
+RDATA-NEXT: 0x180004010 00000000 00000000
+RDATA: 0x180006000 1c100080 01000000 3c100080 01000000
+RDATA-NEXT: 0x180006010 00000000 00000000
+
+RUN: llvm-readobj --coff-basereloc out.dll | FileCheck --check-prefix=RELOC %s
+RELOC: BaseReloc [
+RELOC-NEXT: Entry {
+RELOC-NEXT: Type: DIR64
+RELOC-NEXT: Address: 0x4000
+RELOC-NEXT: }
+RELOC-NEXT: Entry {
+RELOC-NEXT: Type: DIR64
+RELOC-NEXT: Address: 0x4008
+RELOC-NEXT: }
+RELOC: Address: 0x6000
+RELOC-NEXT: }
+RELOC-NEXT: Entry {
+RELOC-NEXT: Type: DIR64
+RELOC-NEXT: Address: 0x6008
+RELOC-NEXT: }
+
+#--- test.s
+ .section .test,"r"
+ .rva __imp_func
+ .rva __imp_aux_func
+ .rva func
+ .rva "#func"
+ .rva __imp_func2
+ .rva __imp_aux_func2
+ .rva func2
+ .rva "#func2"
+ .rva __impchk_func
+ .rva __impchk_func2
+ .rva __imp_data
+
+ .section .text,"xr",discard,__icall_helper_arm64ec
+ .globl __icall_helper_arm64ec
+ .p2align 2, 0x0
+__icall_helper_arm64ec:
+ mov w0, #0
+ ret
+
+ .section .text,"xr",discard,"#__delayLoadHelper2"
+ .globl "#__delayLoadHelper2"
+ .p2align 2, 0x0
+"#__delayLoadHelper2":
+ mov w0, #1
+ ret
+
+ .weak_anti_dep __delayLoadHelper2
+.set __delayLoadHelper2,"#__delayLoadHelper2"
+
+ .section .hybmp$x, "yi"
+ .symidx __imp_func
+ .symidx func_exit_thunk
+ .word 4
+ .symidx __imp_func2
+ .symidx func2_exit_thunk
+ .word 4
+
+ .section .wowthk$aa,"xr",discard,func_exit_thunk
+ .globl func_exit_thunk
+func_exit_thunk:
+ mov w0, #2
+ ret
+
+ .section .wowthk$aa,"xr",discard,func2_exit_thunk
+ .globl func2_exit_thunk
+func2_exit_thunk:
+ mov w0, #3
+ ret
+
+#--- test.def
+NAME test.dll
+EXPORTS
+ func
+ func2
+
+#--- test2.def
+NAME test2.dll
+EXPORTS
+ data DATA
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Fill the regular delay-load IAT with x86_64 delay-load thunks. Similarly to regular imports, create an auxiliary IAT and its copy for ARM64EC calls. These are filled with the same `__impchk_` thunks used for regular imports, which perform an indirect call with `__icall_helper_arm64ec` on the regular delay-load IAT. These auxiliary IATs are exposed via CHPE metadata starting from version 2. The MSVC linker creates one more copy of the auxiliary IAT. `__imp_func` symbols refer to that hidden IAT, while the `#func` thunk performs a call with the public auxiliary IAT. If the public auxiliary IAT is fine for `#func`, it should be fine for calls using the `__imp_func` symbol as well. Therefore, I made `__imp_func` refer to that IAT too.
Fill the regular delay-load IAT with x86_64 delay-load thunks. Similarly to regular imports, create an auxiliary IAT and its copy for ARM64EC calls. These are filled with the same
__impchk_
thunks used for regular imports, which perform an indirect call with__icall_helper_arm64ec
on the regular delay-load IAT. These auxiliary IATs are exposed via CHPE metadata starting from version 2.The MSVC linker creates one more copy of the auxiliary IAT.
__imp_func
symbols refer to that hidden IAT, while the#func
thunk performs a call with the public auxiliary IAT. If the public auxiliary IAT is fine for#func
, it should be fine for calls using the__imp_func
symbol as well. Therefore, I made__imp_func
refer to that IAT too.