-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86][LLD] Handle R_X86_64_CODE_4_GOTTPOFF relocation type #116634
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-lld-elf @llvm/pr-subscribers-lld Author: Feng Zou (fzou1) ChangesFor mov name@GOTTPOFF(%rip), %reg add
in #116633. Linker can treat mov $name, %reg if the first byte of the instruction at the relocation Full diff: https://github.com/llvm/llvm-project/pull/116634.diff 4 Files Affected:
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index e9267bd4128d18..beb40d9266e91e 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -396,6 +396,7 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
case R_X86_64_REX_GOTPCRELX:
case R_X86_64_REX2_GOTPCRELX:
case R_X86_64_GOTTPOFF:
+ case R_X86_64_CODE_4_GOTTPOFF:
return R_GOT_PC;
case R_X86_64_GOTOFF64:
return R_GOTPLTREL;
@@ -547,44 +548,58 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
}
}
-// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
-// R_X86_64_TPOFF32 so that it does not use GOT.
+// In some conditions, R_X86_64_GOTTPOFF/R_X86_64_CODE_4_GOTTPOFF relocation can
+// be optimized to R_X86_64_TPOFF32 so that it does not use GOT.
void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
uint64_t val) const {
uint8_t *inst = loc - 3;
uint8_t reg = loc[-1] >> 3;
uint8_t *regSlot = loc - 1;
- // Note that ADD with RSP or R12 is converted to ADD instead of LEA
- // because LEA with these registers needs 4 bytes to encode and thus
- // wouldn't fit the space.
-
- if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
- // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
- memcpy(inst, "\x48\x81\xc4", 3);
- } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
- // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
- memcpy(inst, "\x49\x81\xc4", 3);
- } else if (memcmp(inst, "\x4c\x03", 2) == 0) {
- // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
- memcpy(inst, "\x4d\x8d", 2);
- *regSlot = 0x80 | (reg << 3) | reg;
- } else if (memcmp(inst, "\x48\x03", 2) == 0) {
- // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
- memcpy(inst, "\x48\x8d", 2);
- *regSlot = 0x80 | (reg << 3) | reg;
- } else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
- // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
- memcpy(inst, "\x49\xc7", 2);
- *regSlot = 0xc0 | reg;
- } else if (memcmp(inst, "\x48\x8b", 2) == 0) {
- // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
- memcpy(inst, "\x48\xc7", 2);
- *regSlot = 0xc0 | reg;
+ if (rel.type == R_X86_64_GOTTPOFF) {
+ // Note that ADD with RSP or R12 is converted to ADD instead of LEA
+ // because LEA with these registers needs 4 bytes to encode and thus
+ // wouldn't fit the space.
+
+ if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
+ // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
+ memcpy(inst, "\x48\x81\xc4", 3);
+ } else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
+ // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
+ memcpy(inst, "\x49\x81\xc4", 3);
+ } else if (memcmp(inst, "\x4c\x03", 2) == 0) {
+ // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
+ memcpy(inst, "\x4d\x8d", 2);
+ *regSlot = 0x80 | (reg << 3) | reg;
+ } else if (memcmp(inst, "\x48\x03", 2) == 0) {
+ // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
+ memcpy(inst, "\x48\x8d", 2);
+ *regSlot = 0x80 | (reg << 3) | reg;
+ } else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
+ // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
+ memcpy(inst, "\x49\xc7", 2);
+ *regSlot = 0xc0 | reg;
+ } else if (memcmp(inst, "\x48\x8b", 2) == 0) {
+ // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
+ memcpy(inst, "\x48\xc7", 2);
+ *regSlot = 0xc0 | reg;
+ } else {
+ ErrAlways(ctx)
+ << getErrorLoc(ctx, loc - 3)
+ << "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
+ }
} else {
- ErrAlways(ctx)
- << getErrorLoc(ctx, loc - 3)
- << "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
+ assert(rel.type == R_X86_64_CODE_4_GOTTPOFF &&
+ "Unsupported relocation type!");
+ assert((loc[-4] == 0xd5) &&
+ "Invalid prefix with R_X86_64_CODE_4_GOTTPOFF!");
+ const uint8_t rex = loc[-3];
+ loc[-3] = (rex & ~0x44) | (rex & 0x44) >> 2;
+ *regSlot = 0xc0 | reg;
+ if (loc[-2] == 0x8b)
+ loc[-2] = 0xc7;
+ else
+ loc[-2] = 0x81;
}
// The original code used a PC relative relocation.
@@ -741,6 +756,7 @@ int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const {
case R_X86_64_REX2_GOTPCRELX:
case R_X86_64_PC32:
case R_X86_64_GOTTPOFF:
+ case R_X86_64_CODE_4_GOTTPOFF:
case R_X86_64_PLT32:
case R_X86_64_TLSGD:
case R_X86_64_TLSLD:
@@ -850,6 +866,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
}
break;
case R_X86_64_GOTTPOFF:
+ case R_X86_64_CODE_4_GOTTPOFF:
if (rel.expr == R_RELAX_TLS_IE_TO_LE) {
relaxTlsIeToLe(loc, rel, val);
} else {
diff --git a/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s b/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s
index b3b1e8a0602772..c6464b4bece097 100644
--- a/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s
+++ b/lld/test/ELF/pack-dyn-relocs-tls-x86-64.s
@@ -12,12 +12,16 @@
.globl foo
foo:
movq tlsvar@GOTTPOFF(%rip), %rcx
+ movq tlsvar2@GOTTPOFF(%rip), %r31
+
.section .tdata,"awT",@progbits
.space 0x1234
tlsvar:
.word 42
-
+tlsvar2:
+ .word 42
// CHECK: Section ({{.+}}) .rela.dyn {
// CHECK-NEXT: R_X86_64_TPOFF64 - 0x1234
+// CHECK-NEXT: R_X86_64_TPOFF64 - 0x1236
// CHECK-NEXT: }
diff --git a/lld/test/ELF/tls-opt.s b/lld/test/ELF/tls-opt.s
index ce90ba4f869ce4..818203ee19cb7c 100644
--- a/lld/test/ELF/tls-opt.s
+++ b/lld/test/ELF/tls-opt.s
@@ -20,6 +20,12 @@
// DISASM-NEXT: leaq -4(%r15), %r15
// DISASM-NEXT: addq $-4, %rsp
// DISASM-NEXT: addq $-4, %r12
+// DISASM-NEXT: movq $-8, %r16
+// DISASM-NEXT: movq $-8, %r20
+// DISASM-NEXT: movq $-4, %r16
+// DISASM-NEXT: addq $-8, %r16
+// DISASM-NEXT: addq $-8, %r28
+// DISASM-NEXT: addq $-4, %r16
// LD to LE:
// DISASM-NEXT: movq %fs:0, %rax
@@ -69,6 +75,13 @@ _start:
addq tls1@GOTTPOFF(%rip), %r15
addq tls1@GOTTPOFF(%rip), %rsp
addq tls1@GOTTPOFF(%rip), %r12
+ # EGPR
+ movq tls0@GOTTPOFF(%rip), %r16
+ movq tls0@GOTTPOFF(%rip), %r20
+ movq tls1@GOTTPOFF(%rip), %r16
+ addq tls0@GOTTPOFF(%rip), %r16
+ addq tls0@GOTTPOFF(%rip), %r28
+ addq tls1@GOTTPOFF(%rip), %r16
// LD to LE
leaq tls0@tlsld(%rip), %rdi
diff --git a/lld/test/ELF/x86-64-tls-ie-local.s b/lld/test/ELF/x86-64-tls-ie-local.s
index c527c86e667713..08547d6b4b5125 100644
--- a/lld/test/ELF/x86-64-tls-ie-local.s
+++ b/lld/test/ELF/x86-64-tls-ie-local.s
@@ -5,24 +5,38 @@
# RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=REL %s
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t.so | FileCheck %s
-# SEC: .got PROGBITS 0000000000002338 000338 000010 00 WA 0 0 8
+# SEC: .got PROGBITS 0000000000002378 000378 000020 00 WA 0 0 8
## Dynamic relocations for non-preemptable symbols in a shared object have section index 0.
# REL: .rela.dyn {
-# REL-NEXT: 0x2338 R_X86_64_TPOFF64 - 0x0
-# REL-NEXT: 0x2340 R_X86_64_TPOFF64 - 0x4
+# REL-NEXT: 0x2378 R_X86_64_TPOFF64 - 0x0
+# REL-NEXT: 0x2380 R_X86_64_TPOFF64 - 0x8
+# REL-NEXT: 0x2388 R_X86_64_TPOFF64 - 0x4
+# REL-NEXT: 0x2390 R_X86_64_TPOFF64 - 0xC
# REL-NEXT: }
-## &.got[0] - 0x127f = 0x2338 - 0x127f = 4281
-## &.got[1] - 0x1286 = 0x2340 - 0x1286 = 4282
-# CHECK: 1278: addq 4281(%rip), %rax
-# CHECK-NEXT: 127f: addq 4282(%rip), %rax
+## &.got[0] - 0x12af = 0x2378 - 0x12af = 4297
+## &.got[1] - 0x12b6 = 0x2380 - 0x12b6 = 4298
+## &.got[2] - 0x12be = 0x2388 - 0x12be = 4298
+## &.got[3] - 0x12c6 = 0x2390 - 0x12c6 = 4298
+
+# CHECK: 12a8: addq 4297(%rip), %rax
+# CHECK-NEXT: 12af: addq 4298(%rip), %rax
+# CHECK-NEXT: 12b6: addq 4298(%rip), %r16
+# CHECK-NEXT: 12be: addq 4298(%rip), %r16
addq foo@GOTTPOFF(%rip), %rax
addq bar@GOTTPOFF(%rip), %rax
+addq foo2@GOTTPOFF(%rip), %r16
+addq bar2@GOTTPOFF(%rip), %r16
+
.section .tbss,"awT",@nobits
foo:
.long 0
+foo2:
+ .long 0
bar:
.long 0
+bar2:
+ .long 0
|
// because LEA with these registers needs 4 bytes to encode and thus | ||
// wouldn't fit the space. | ||
|
||
if (memcmp(inst, "\x48\x03\x25", 3) == 0) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This complexity is out of my depth. Hope that @KanRobert can verify it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the original code for handling R_X86_64_GOTPTOFF. I added another path below to handle R_X86_64_CODE_4_GOTPTOFF.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will try to check if after #116633, at that time, we can at least ensure that the existing rewrite is correct by LIT tests.
lld/ELF/Arch/X86_64.cpp
Outdated
memcpy(inst, "\x48\xc7", 2); | ||
*regSlot = 0xc0 | reg; | ||
} else { | ||
ErrAlways(ctx) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While here, switch to Err(ctx)
(which is preferred over ErrAlways so that --noinhibit-exec creates an output file, even if broken; useful for debugging)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to Err(ctx).
lld/test/ELF/x86-64-tls-ie-local.s
Outdated
|
||
.section .tbss,"awT",@nobits | ||
foo: | ||
.long 0 | ||
foo2: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no need to add foo2 and bar2
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated.
For mov name@GOTTPOFF(%rip), %reg add name@GOTTPOFF(%rip), %reg add `R_X86_64_CODE_4_GOTTPOFF` = 44 in llvm#116633. Linker can treat `R_X86_64_CODE_4_GOTTPOFF` as `R_X86_64_GOTTPOFF` or convert the instructions above to mov $name, %reg add $name, %reg if the first byte of the instruction at the relocation `offset - 4` is `0xd5` (namely, encoded w/REX2 prefix) when possible.
fb39806
to
d1fdd48
Compare
@MaskRay / @KanRobert , could you please review? Thanks. |
lld/ELF/Arch/X86_64.cpp
Outdated
Err(ctx) | ||
<< getErrorLoc(ctx, loc - 3) | ||
<< "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only"; | ||
} | ||
} else { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
relaxTlsIeToLe
is only called for R_X86_64_CODE_4_GOTTPOFF
and R_X86_64_GOTTPOFF
. Probably, the code can be more readable by removing the first assert
below and use
else if (rel.type == R_X86_64_CODE_4_GOTTPOFF) {
} else {
llvm_unreachable("Unsupported relocation type!")
}
And I think the following R_X86_64_CODE_6_GOTTPOFF support can benefit from this too.
The second `assert` should not be there, the diagnostic info should be always be reported by code like line 606
Err(ctx) << getErrorLoc(ctx, loc - 3)
<< "R_X86_64_CODE_4_GOTTPOFF must be used in MOVQ or ADDQ "
"instructions only";
, otherwise the error would be hidden in the release build. BTW, it seems `loc - 4` should be used here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. Thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@MaskRay , any comments? |
@MaskRay , friendly ping. |
Sorry. I added early return if the prefix is invalid. Please help review again. Thanks. |
I'll commit first. Any further comments are welcome. |
For
mov name@GOTTPOFF(%rip), %reg
add name@GOTTPOFF(%rip), %reg
add
R_X86_64_CODE_4_GOTTPOFF
= 44in #116633.
Linker can treat
R_X86_64_CODE_4_GOTTPOFF
asR_X86_64_GOTTPOFF
or convert the instructions above tomov $name, %reg
add $name, %reg
if the first byte of the instruction at the relocation
offset - 4
is0xd5
(namely, encoded w/REX2 prefix) when possible.Binutils patch: bminor/binutils-gdb@a533c8d
Binutils mailthread: https://sourceware.org/pipermail/binutils/2023-December/131463.html
ABI discussion: https://groups.google.com/g/x86-64-abi/c/ACwD-UQXVDs/m/vrgTenKyFwAJ
Blog: https://kanrobert.github.io/rfc/All-about-APX-relocation