Skip to content

[lld] Add support for EC code map. #69101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions lld/COFF/Chunks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -896,6 +896,20 @@ void RVAFlagTableChunk::writeTo(uint8_t *buf) const {
"RVA tables should be de-duplicated");
}

size_t ECCodeMapChunk::getSize() const {
return map.size() * sizeof(chpe_range_entry);
}

void ECCodeMapChunk::writeTo(uint8_t *buf) const {
auto table = reinterpret_cast<chpe_range_entry *>(buf);
for (uint32_t i = 0; i < map.size(); i++) {
const ECCodeMapEntry &entry = map[i];
uint32_t start = entry.first->getRVA();
table[i].StartOffset = start | entry.type;
table[i].Length = entry.last->getRVA() + entry.last->getSize() - start;
}
}

// MinGW specific, for the "automatic import of variables from DLLs" feature.
size_t PseudoRelocTableChunk::getSize() const {
if (relocs.empty())
Expand Down
21 changes: 21 additions & 0 deletions lld/COFF/Chunks.h
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,27 @@ class EmptyChunk : public NonSectionChunk {
void writeTo(uint8_t *buf) const override {}
};

class ECCodeMapEntry {
public:
ECCodeMapEntry(Chunk *first, Chunk *last, chpe_range_type type)
: first(first), last(last), type(type) {}
Chunk *first;
Chunk *last;
chpe_range_type type;
};

// This is a chunk containing CHPE code map on EC targets. It's a table
// of address ranges and their types.
class ECCodeMapChunk : public NonSectionChunk {
public:
ECCodeMapChunk(std::vector<ECCodeMapEntry> &map) : map(map) {}
size_t getSize() const override;
void writeTo(uint8_t *buf) const override;

private:
std::vector<ECCodeMapEntry> &map;
};

// MinGW specific, for the "automatic import of variables from DLLs" feature.
// This provides the table of runtime pseudo relocations, for variable
// references that turned out to need to be imported from a DLL even though
Expand Down
5 changes: 5 additions & 0 deletions lld/COFF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2360,6 +2360,11 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
ctx.symtab.addAbsolute(mangle("__guard_eh_cont_count"), 0);
ctx.symtab.addAbsolute(mangle("__guard_eh_cont_table"), 0);

if (isArm64EC(config->machine)) {
ctx.symtab.addAbsolute("__hybrid_code_map", 0);
ctx.symtab.addAbsolute("__hybrid_code_map_count", 0);
}

if (config->pseudoRelocs) {
ctx.symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0);
ctx.symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0);
Expand Down
61 changes: 61 additions & 0 deletions lld/COFF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ class Writer {
uint16_t type, int margin);
bool createThunks(OutputSection *os, int margin);
bool verifyRanges(const std::vector<Chunk *> chunks);
void createECCodeMap();
void finalizeAddresses();
void removeEmptySections();
void assignOutputSectionIndices();
Expand All @@ -229,6 +230,7 @@ class Writer {
template <typename PEHeaderTy> void writeHeader();
void createSEHTable();
void createRuntimePseudoRelocs();
void createECChunks();
void insertCtorDtorSymbols();
void markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols);
void createGuardCFTables();
Expand Down Expand Up @@ -272,6 +274,7 @@ class Writer {
std::map<PartialSectionKey, PartialSection *> partialSections;
std::vector<char> strtab;
std::vector<llvm::object::coff_symbol16> outputSymtab;
std::vector<ECCodeMapEntry> codeMap;
IdataContents idata;
Chunk *importTableStart = nullptr;
uint64_t importTableSize = 0;
Expand Down Expand Up @@ -537,6 +540,48 @@ bool Writer::createThunks(OutputSection *os, int margin) {
return addressesChanged;
}

// Create a code map for CHPE metadata.
void Writer::createECCodeMap() {
if (!isArm64EC(ctx.config.machine))
return;

// Clear the map in case we were're recomputing the map after adding
// a range extension thunk.
codeMap.clear();

std::optional<chpe_range_type> lastType;
Chunk *first, *last;

auto closeRange = [&]() {
if (lastType) {
codeMap.push_back({first, last, *lastType});
lastType.reset();
}
};

for (OutputSection *sec : ctx.outputSections) {
for (Chunk *c : sec->chunks) {
// Skip empty section chunks. MS link.exe does not seem to do that and
// generates empty code ranges in some cases.
if (isa<SectionChunk>(c) && !c->getSize())
continue;

std::optional<chpe_range_type> chunkType = c->getArm64ECRangeType();
if (chunkType != lastType) {
closeRange();
first = c;
lastType = chunkType;
}
last = c;
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we close the range at the end of each section as well? Or is it valid to have two sections that follow on each other (or one section, followed by a non-code section, followed by another code section with the same type, and having the range spanning over that?)

Copy link
Contributor Author

@cjacek cjacek Nov 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is valid for code range to span over multiple sections of the same type. I will add a test covering that case.

I retested more corner cases with MS link.exe, because I noticed that the isCodeSection here may not be exactly right. As implemented in #69100 and #70722, data chunks properties are preserved when merging into code sections, code chunks may be preserved when merging into data sections. That's indeed the case, although I ran into some bugs in link.exe so the whole thing seems fragile. Boundaries are not calculated correctly in some cases. I will update patches with what I think is the intended behavior.

One thing I didn't replicate is that data chunks do not cause closing ranges if they don't cross a page boundaries. This usually does not make a difference, because such data chunks are aligned (as implemented in #69100), so they cross page boundaries anyway. However when doing code into data merges, they are not aligned, so it's possible that they don't cross page boundaries. This was a bit confusing when testing and I don't think this makes much sense to replicate (and would be tricky to do because during EC map creation addresses are not yet assigned and can't be assigned because we don't know the size of map itself).

I also noticed a difference in behavior that is not ARM64EC specific. When doing code into data merges with MS link.exe, it marks target section with a code section flag (but still no exec permission, so it doesn't affect isCodeSection result and our use of isCodeSection matches link.exe behavior in that regard). I will create PR implementing that. It's needed for disassembler to disassemble such sections, which is used in my new tests, but if you think it's not worth replicating, I could alternatively just remove that part of the test.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I overlooked the if (!sec->isCodeSection()) { closeRange(); continue; } bit in the previous version - I guess that would have covered the case I thougth about. But with that removed, I guess this also looks good. Thanks!


closeRange();

Symbol *tableCountSym = ctx.symtab.findUnderscore("__hybrid_code_map_count");
cast<DefinedAbsolute>(tableCountSym)->setVA(codeMap.size());
}

// Verify that all relocations are in range, with no extra margin requirements.
bool Writer::verifyRanges(const std::vector<Chunk *> chunks) {
for (Chunk *c : chunks) {
Expand Down Expand Up @@ -1086,6 +1131,9 @@ void Writer::createMiscChunks() {
if (config->guardCF != GuardCFLevel::Off)
createGuardCFTables();

if (isArm64EC(config->machine))
createECChunks();

if (config->autoImport)
createRuntimePseudoRelocs();

Expand Down Expand Up @@ -1411,6 +1459,10 @@ void Writer::assignAddresses() {
llvm::TimeTraceScope timeScope("Assign addresses");
Configuration *config = &ctx.config;

// We need to create EC code map so that ECCodeMapChunk knows its size.
// We do it here to make sure that we account for range extension chunks.
createECCodeMap();

sizeOfHeaders = dosStubSize + sizeof(PEMagic) + sizeof(coff_file_header) +
sizeof(data_directory) * numberOfDataDirectory +
sizeof(coff_section) * ctx.outputSections.size();
Expand Down Expand Up @@ -1946,6 +1998,15 @@ void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym,
cast<DefinedAbsolute>(c)->setVA(tableChunk->getSize() / (hasFlag ? 5 : 4));
}

// Create CHPE metadata chunks.
void Writer::createECChunks() {
auto codeMapChunk = make<ECCodeMapChunk>(codeMap);
rdataSec->addChunk(codeMapChunk);
Symbol *codeMapSym = ctx.symtab.findUnderscore("__hybrid_code_map");
replaceSymbol<DefinedSynthetic>(codeMapSym, codeMapSym->getName(),
codeMapChunk);
}

// MinGW specific. Gather all relocations that are imported from a DLL even
// though the code didn't expect it to, produce the table that the runtime
// uses for fixing them up, and provide the synthetic symbols that the
Expand Down
4 changes: 2 additions & 2 deletions lld/test/COFF/Inputs/loadconfig-arm64ec.s
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ __os_arm64x_helper8:
.p2align 3, 0
__chpe_metadata:
.word 1
.rva code_map
.word code_map_count
.rva __hybrid_code_map
.word __hybrid_code_map_count
.word 0 // __x64_code_ranges_to_entry_points
.word 0 //__arm64x_redirection_metadata
.rva __os_arm64x_dispatch_call_no_redirect
Expand Down
132 changes: 79 additions & 53 deletions lld/test/COFF/arm64ec-codemap.test
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@ RUN: split-file %s %t.dir && cd %t.dir

RUN: llvm-mc -filetype=obj -triple=arm64-windows arm64-func-sym.s -o arm64-func-sym.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows arm64ec-func-sym.s -o arm64ec-func-sym.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows arm64ec-func-sym2.s -o arm64ec-func-sym2.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows data-sec.s -o data-sec.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows empty-sec.s -o arm64ec-empty-sec.obj
RUN: llvm-mc -filetype=obj -triple=x86_64-windows x86_64-func-sym.s -o x86_64-func-sym.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows codemap.s -o codemap.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows codemap2.s -o codemap2.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows codemap3.s -o codemap3.obj
RUN: llvm-mc -filetype=obj -triple=x86_64-windows empty-sec.s -o x86_64-empty-sec.obj
RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig-arm64ec.obj

Link ARM64EC DLL and verify that the code is arranged as expected.

RUN: lld-link -out:test.dll -machine:arm64ec arm64ec-func-sym.obj x86_64-func-sym.obj \
RUN: codemap.obj loadconfig-arm64ec.obj -dll -noentry
RUN: loadconfig-arm64ec.obj -dll -noentry

RUN: llvm-readobj --coff-load-config test.dll | FileCheck -check-prefix=CODEMAP %s
CODEMAP: CodeMap [
Expand Down Expand Up @@ -45,12 +45,18 @@ DISASM-NEXT: 180006005: c3 retq
Order of arguments doesn't matter in this case, chunks are sorted by target type anyway.

RUN: lld-link -out:test2.dll -machine:arm64ec x86_64-func-sym.obj arm64ec-func-sym.obj \
RUN: codemap.obj loadconfig-arm64ec.obj -dll -noentry
RUN: loadconfig-arm64ec.obj -dll -noentry
RUN: llvm-readobj --coff-load-config test2.dll | FileCheck -check-prefix=CODEMAP %s
RUN: llvm-objdump -d test2.dll | FileCheck -check-prefix=DISASM %s

RUN: lld-link -out:testx.dll -machine:arm64x arm64-func-sym.obj arm64ec-func-sym.obj \
RUN: x86_64-func-sym.obj codemap2.obj loadconfig-arm64ec.obj -dll -noentry
RUN: x86_64-func-sym.obj loadconfig-arm64ec.obj -dll -noentry

Adding empty chunks does not affect code map ranges.

RUN: lld-link -out:test3.dll -machine:arm64ec x86_64-empty-sec.obj arm64ec-empty-sec.obj \
RUN: arm64ec-func-sym.obj x86_64-func-sym.obj loadconfig-arm64ec.obj -dll -noentry
RUN: llvm-readobj --coff-load-config test3.dll | FileCheck -check-prefix=CODEMAP %s

Do the same with ARM64X target.

Expand Down Expand Up @@ -88,7 +94,7 @@ DISASMX-NEXT: 180007005: c3 retq
Test merged sections.

RUN: lld-link -out:testm.dll -machine:arm64ec arm64ec-func-sym.obj x86_64-func-sym.obj \
RUN: codemap3.obj loadconfig-arm64ec.obj -dll -noentry -merge:test=.text
RUN: loadconfig-arm64ec.obj -dll -noentry -merge:test=.text

RUN: llvm-readobj --coff-load-config testm.dll | FileCheck -check-prefix=CODEMAPM %s
CODEMAPM: CodeMap [
Expand All @@ -113,7 +119,7 @@ DISASMM-NEXT: 18000200d: c3 retq

Merging data sections into code sections causes data to be separated from the code when sorting chunks.

RUN: lld-link -out:testdm.dll -machine:arm64ec arm64ec-func-sym.obj x86_64-func-sym.obj codemap.obj \
RUN: lld-link -out:testdm.dll -machine:arm64ec arm64ec-func-sym.obj x86_64-func-sym.obj \
RUN: data-sec.obj loadconfig-arm64ec.obj -dll -noentry -merge:.testdata=.text -merge:.rdata=test

RUN: llvm-readobj --coff-load-config testdm.dll | FileCheck -check-prefix=CODEMAPDM %s
Expand Down Expand Up @@ -145,6 +151,59 @@ DISASMDM-NEXT: ...
DISASMDM-NEXT: 180007000: b8 06 00 00 00 movl $0x6, %eax
DISASMDM-NEXT: 180007005: c3 retq

Merging a code section into data section produces a valid code map.

RUN: lld-link -out:testcm.dll -machine:arm64ec x86_64-func-sym.obj data-sec.obj \
RUN: loadconfig-arm64ec.obj -dll -noentry -merge:test=.testdata -merge:.text=.testdata

RUN: llvm-readobj --coff-load-config testcm.dll | FileCheck -check-prefix=CODEMAPCM %s
CODEMAPCM: CodeMap [
CODEMAPCM-NEXT: 0x3008 - 0x3016 X64
CODEMAPCM-NEXT: ]

RUN: llvm-objdump -d testcm.dll | FileCheck -check-prefix=DISASMCM %s
DISASMCM: Disassembly of section .testdat:
DISASMCM-EMPTY:
DISASMCM-NEXT: 0000000180003000 <.testdat>:
DISASMCM-NEXT: 180003000: 00000001 udf #0x1
DISASMCM-NEXT: 180003004: 00000000 udf #0x0
DISASMCM-NEXT: 180003008: b8 03 00 00 00 movl $0x3, %eax
DISASMCM-NEXT: 18000300d: c3 retq
DISASMCM-NEXT: 18000300e: 00 00 addb %al, (%rax)
DISASMCM-NEXT: 180003010: b8 06 00 00 00 movl $0x6, %eax
DISASMCM-NEXT: 180003015: c3

Test code map range entry spanning over multiple sections.

RUN: lld-link -out:testms.dll -machine:arm64ec arm64ec-func-sym.obj arm64ec-func-sym2.obj \
RUN: loadconfig-arm64ec.obj -dll -noentry

RUN: llvm-readobj --coff-load-config testms.dll | FileCheck -check-prefix=CODEMAPMS %s
CODEMAPMS: CodeMap [
CODEMAPMS-NEXT: 0x1000 - 0x1008 ARM64EC
CODEMAPMS-NEXT: 0x4000 - 0x5008 ARM64EC
CODEMAPMS-NEXT: ]

RUN: llvm-objdump -d testms.dll | FileCheck -check-prefix=DISASMMS %s
DISASMMS: Disassembly of section .text:
DISASMMS-EMPTY:
DISASMMS-NEXT: 0000000180001000 <.text>:
DISASMMS-NEXT: 180001000: 52800040 mov w0, #0x2 // =2
DISASMMS-NEXT: 180001004: d65f03c0 ret
DISASMMS-EMPTY:
DISASMMS-NEXT: Disassembly of section test:
DISASMMS-EMPTY:
DISASMMS-NEXT: 0000000180004000 <test>:
DISASMMS-NEXT: 180004000: 528000a0 mov w0, #0x5 // =5
DISASMMS-NEXT: 180004004: d65f03c0 ret
DISASMMS-EMPTY:
DISASMMS-NEXT: Disassembly of section test2:
DISASMMS-EMPTY:
DISASMMS-NEXT: 0000000180005000 <test2>:
DISASMMS-NEXT: 180005000: 528000a0 mov w0, #0x5 // =5
DISASMMS-NEXT: 180005004: d65f03c0 ret


#--- arm64-func-sym.s
.text
.globl arm64_func_sym
Expand All @@ -168,6 +227,14 @@ arm64ec_func_sym2:
mov w0, #5
ret

#--- arm64ec-func-sym2.s
.section test2, "xr"
.globl arm64ec_func_sym3
.p2align 2, 0x0
arm64ec_func_sym3:
mov w0, #5
ret

#--- x86_64-func-sym.s
.text
.globl x86_64_func_sym
Expand All @@ -187,48 +254,7 @@ x86_64_func_sym2:
.section .testdata, "rd"
.xword 1

#--- codemap.s
.section .rdata,"dr"
.globl code_map
code_map:
.rva arm64ec_func_sym + 1
.word 8
.rva x86_64_func_sym + 2
.word 6
.rva arm64ec_func_sym2 + 1
.word 8
.rva x86_64_func_sym2 + 2
.word 6

.globl code_map_count
code_map_count = 4

#--- codemap2.s
.section .rdata,"dr"
.globl code_map
code_map:
.rva arm64_func_sym
.word 8
.rva arm64ec_func_sym + 1
.word 8
.rva x86_64_func_sym + 2
.word 6
.rva arm64ec_func_sym2 + 1
.word 8
.rva x86_64_func_sym2 + 2
.word 6

.globl code_map_count
code_map_count = 5

#--- codemap3.s
.section .rdata,"dr"
.globl code_map
code_map:
.rva arm64ec_func_sym + 1
.word 16
.rva x86_64_func_sym + 2
.word 14

.globl code_map_count
code_map_count = 2
#--- empty-sec.s
.section .empty1, "xr"
.section .empty2, "xr"
.section .empty3, "xr"