Skip to content

[X86][LLD] Handle R_X86_64_CODE_4_GOTTPOFF relocation type #116634

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 58 additions & 30 deletions lld/ELF/Arch/X86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
case R_X86_64_REX_GOTPCRELX:
case R_X86_64_CODE_4_GOTPCRELX:
case R_X86_64_GOTTPOFF:
case R_X86_64_CODE_4_GOTTPOFF:
return R_GOT_PC;
case R_X86_64_GOTOFF64:
return R_GOTPLTREL;
Expand Down Expand Up @@ -547,44 +548,69 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
}
}

// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
// R_X86_64_TPOFF32 so that it does not use GOT.
// In some conditions, R_X86_64_GOTTPOFF/R_X86_64_CODE_4_GOTTPOFF relocation can
// be optimized to R_X86_64_TPOFF32 so that it does not use GOT.
void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
uint64_t val) const {
uint8_t *inst = loc - 3;
uint8_t reg = loc[-1] >> 3;
uint8_t *regSlot = loc - 1;

// Note that ADD with RSP or R12 is converted to ADD instead of LEA
// because LEA with these registers needs 4 bytes to encode and thus
// wouldn't fit the space.

if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
// "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
memcpy(inst, "\x48\x81\xc4", 3);
} else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
// "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
memcpy(inst, "\x49\x81\xc4", 3);
} else if (memcmp(inst, "\x4c\x03", 2) == 0) {
// "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
memcpy(inst, "\x4d\x8d", 2);
*regSlot = 0x80 | (reg << 3) | reg;
} else if (memcmp(inst, "\x48\x03", 2) == 0) {
// "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
memcpy(inst, "\x48\x8d", 2);
*regSlot = 0x80 | (reg << 3) | reg;
} else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
// "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
memcpy(inst, "\x49\xc7", 2);
*regSlot = 0xc0 | reg;
} else if (memcmp(inst, "\x48\x8b", 2) == 0) {
// "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
memcpy(inst, "\x48\xc7", 2);
if (rel.type == R_X86_64_GOTTPOFF) {
// Note that ADD with RSP or R12 is converted to ADD instead of LEA
// because LEA with these registers needs 4 bytes to encode and thus
// wouldn't fit the space.

if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This complexity is out of my depth. Hope that @KanRobert can verify it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the original code for handling R_X86_64_GOTPTOFF. I added another path below to handle R_X86_64_CODE_4_GOTPTOFF.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will try to check if after #116633, at that time, we can at least ensure that the existing rewrite is correct by LIT tests.

// "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
memcpy(inst, "\x48\x81\xc4", 3);
} else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
// "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
memcpy(inst, "\x49\x81\xc4", 3);
} else if (memcmp(inst, "\x4c\x03", 2) == 0) {
// "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
memcpy(inst, "\x4d\x8d", 2);
*regSlot = 0x80 | (reg << 3) | reg;
} else if (memcmp(inst, "\x48\x03", 2) == 0) {
// "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
memcpy(inst, "\x48\x8d", 2);
*regSlot = 0x80 | (reg << 3) | reg;
} else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
// "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
memcpy(inst, "\x49\xc7", 2);
*regSlot = 0xc0 | reg;
} else if (memcmp(inst, "\x48\x8b", 2) == 0) {
// "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
memcpy(inst, "\x48\xc7", 2);
*regSlot = 0xc0 | reg;
} else {
Err(ctx)
<< getErrorLoc(ctx, loc - 3)
<< "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
}
} else if (rel.type == R_X86_64_CODE_4_GOTTPOFF) {
if (loc[-4] != 0xd5) {
Err(ctx) << getErrorLoc(ctx, loc - 4)
<< "Invalid prefix with R_X86_64_CODE_4_GOTTPOFF!";
return;
}
const uint8_t rex = loc[-3];
loc[-3] = (rex & ~0x44) | (rex & 0x44) >> 2;
*regSlot = 0xc0 | reg;

if (loc[-2] == 0x8b) {
// "movq foo@gottpoff(%rip),%r[16-31]" -> "movq $foo,%r[16-31]"
loc[-2] = 0xc7;
} else if (loc[-2] == 0x03) {
// "addq foo@gottpoff(%rip),%r[16-31]" -> "addq $foo,%r[16-31]"
loc[-2] = 0x81;
} else {
Err(ctx) << getErrorLoc(ctx, loc - 4)
<< "R_X86_64_CODE_4_GOTTPOFF must be used in MOVQ or ADDQ "
"instructions only";
}
} else {
ErrAlways(ctx)
<< getErrorLoc(ctx, loc - 3)
<< "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
llvm_unreachable("Unsupported relocation type!");
}

// The original code used a PC relative relocation.
Expand Down Expand Up @@ -741,6 +767,7 @@ int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const {
case R_X86_64_CODE_4_GOTPCRELX:
case R_X86_64_PC32:
case R_X86_64_GOTTPOFF:
case R_X86_64_CODE_4_GOTTPOFF:
case R_X86_64_PLT32:
case R_X86_64_TLSGD:
case R_X86_64_TLSLD:
Expand Down Expand Up @@ -850,6 +877,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
}
break;
case R_X86_64_GOTTPOFF:
case R_X86_64_CODE_4_GOTTPOFF:
if (rel.expr == R_RELAX_TLS_IE_TO_LE) {
relaxTlsIeToLe(loc, rel, val);
} else {
Expand Down
6 changes: 5 additions & 1 deletion lld/test/ELF/pack-dyn-relocs-tls-x86-64.s
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@
.globl foo
foo:
movq tlsvar@GOTTPOFF(%rip), %rcx
movq tlsvar2@GOTTPOFF(%rip), %r31


.section .tdata,"awT",@progbits
.space 0x1234
tlsvar:
.word 42

tlsvar2:
.word 42
// CHECK: Section ({{.+}}) .rela.dyn {
// CHECK-NEXT: R_X86_64_TPOFF64 - 0x1234
// CHECK-NEXT: R_X86_64_TPOFF64 - 0x1236
// CHECK-NEXT: }
13 changes: 13 additions & 0 deletions lld/test/ELF/tls-opt.s
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
// DISASM-NEXT: leaq -4(%r15), %r15
// DISASM-NEXT: addq $-4, %rsp
// DISASM-NEXT: addq $-4, %r12
// DISASM-NEXT: movq $-8, %r16
// DISASM-NEXT: movq $-8, %r20
// DISASM-NEXT: movq $-4, %r16
// DISASM-NEXT: addq $-8, %r16
// DISASM-NEXT: addq $-8, %r28
// DISASM-NEXT: addq $-4, %r16

// LD to LE:
// DISASM-NEXT: movq %fs:0, %rax
Expand Down Expand Up @@ -69,6 +75,13 @@ _start:
addq tls1@GOTTPOFF(%rip), %r15
addq tls1@GOTTPOFF(%rip), %rsp
addq tls1@GOTTPOFF(%rip), %r12
# EGPR
movq tls0@GOTTPOFF(%rip), %r16
movq tls0@GOTTPOFF(%rip), %r20
movq tls1@GOTTPOFF(%rip), %r16
addq tls0@GOTTPOFF(%rip), %r16
addq tls0@GOTTPOFF(%rip), %r28
addq tls1@GOTTPOFF(%rip), %r16

// LD to LE
leaq tls0@tlsld(%rip), %rdi
Expand Down
22 changes: 15 additions & 7 deletions lld/test/ELF/x86-64-tls-ie-local.s
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,29 @@
# RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=REL %s
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t.so | FileCheck %s

# SEC: .got PROGBITS 0000000000002338 000338 000010 00 WA 0 0 8
# SEC: .got PROGBITS 0000000000002348 000348 000010 00 WA 0 0 8

## Dynamic relocations for non-preemptable symbols in a shared object have section index 0.
# REL: .rela.dyn {
# REL-NEXT: 0x2338 R_X86_64_TPOFF64 - 0x0
# REL-NEXT: 0x2340 R_X86_64_TPOFF64 - 0x4
# REL-NEXT: 0x2348 R_X86_64_TPOFF64 - 0x0
# REL-NEXT: 0x2350 R_X86_64_TPOFF64 - 0x4
# REL-NEXT: }

## &.got[0] - 0x127f = 0x2338 - 0x127f = 4281
## &.got[1] - 0x1286 = 0x2340 - 0x1286 = 4282
# CHECK: 1278: addq 4281(%rip), %rax
# CHECK-NEXT: 127f: addq 4282(%rip), %rax
## &.got[0] - 0x127f = 0x2348 - 0x127f = 4297
## &.got[1] - 0x1286 = 0x2350 - 0x1286 = 4298
## &.got[2] - 0x128e = 0x2348 - 0x128e = 4282
## &.got[3] - 0x1296 = 0x2350 - 0x1296 = 4282

# CHECK: 1278: addq 4297(%rip), %rax
# CHECK-NEXT: 127f: addq 4298(%rip), %rax
# CHECK-NEXT: 1286: addq 4282(%rip), %r16
# CHECK-NEXT: 128e: addq 4282(%rip), %r16

addq foo@GOTTPOFF(%rip), %rax
addq bar@GOTTPOFF(%rip), %rax
addq foo@GOTTPOFF(%rip), %r16
addq bar@GOTTPOFF(%rip), %r16


.section .tbss,"awT",@nobits
foo:
Expand Down
Loading