Skip to content

Commit 410e011

Browse files
committed
[X86][LLD] Handle R_X86_64_CODE_4_GOTTPOFF relocation type
For mov name@GOTTPOFF(%rip), %reg add name@GOTTPOFF(%rip), %reg add `R_X86_64_CODE_4_GOTTPOFF` = 44 in llvm#116633. Linker can treat `R_X86_64_CODE_4_GOTTPOFF` as `R_X86_64_GOTTPOFF` or convert the instructions above to mov $name, %reg add $name, %reg if the first byte of the instruction at the relocation `offset - 4` is `0xd5` (namely, encoded w/REX2 prefix) when possible.
1 parent a52cb0a commit 410e011

File tree

4 files changed

+87
-39
lines changed

4 files changed

+87
-39
lines changed

lld/ELF/Arch/X86_64.cpp

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,7 @@ RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
396396
case R_X86_64_REX_GOTPCRELX:
397397
case R_X86_64_REX2_GOTPCRELX:
398398
case R_X86_64_GOTTPOFF:
399+
case R_X86_64_CODE_4_GOTTPOFF:
399400
return R_GOT_PC;
400401
case R_X86_64_GOTOFF64:
401402
return R_GOTPLTREL;
@@ -547,44 +548,58 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
547548
}
548549
}
549550

550-
// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
551-
// R_X86_64_TPOFF32 so that it does not use GOT.
551+
// In some conditions, R_X86_64_GOTTPOFF/R_X86_64_CODE_4_GOTTPOFF relocation can
552+
// be optimized to R_X86_64_TPOFF32 so that it does not use GOT.
552553
void X86_64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
553554
uint64_t val) const {
554555
uint8_t *inst = loc - 3;
555556
uint8_t reg = loc[-1] >> 3;
556557
uint8_t *regSlot = loc - 1;
557558

558-
// Note that ADD with RSP or R12 is converted to ADD instead of LEA
559-
// because LEA with these registers needs 4 bytes to encode and thus
560-
// wouldn't fit the space.
561-
562-
if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
563-
// "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
564-
memcpy(inst, "\x48\x81\xc4", 3);
565-
} else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
566-
// "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
567-
memcpy(inst, "\x49\x81\xc4", 3);
568-
} else if (memcmp(inst, "\x4c\x03", 2) == 0) {
569-
// "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
570-
memcpy(inst, "\x4d\x8d", 2);
571-
*regSlot = 0x80 | (reg << 3) | reg;
572-
} else if (memcmp(inst, "\x48\x03", 2) == 0) {
573-
// "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
574-
memcpy(inst, "\x48\x8d", 2);
575-
*regSlot = 0x80 | (reg << 3) | reg;
576-
} else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
577-
// "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
578-
memcpy(inst, "\x49\xc7", 2);
579-
*regSlot = 0xc0 | reg;
580-
} else if (memcmp(inst, "\x48\x8b", 2) == 0) {
581-
// "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
582-
memcpy(inst, "\x48\xc7", 2);
583-
*regSlot = 0xc0 | reg;
559+
if (rel.type == R_X86_64_GOTTPOFF) {
560+
// Note that ADD with RSP or R12 is converted to ADD instead of LEA
561+
// because LEA with these registers needs 4 bytes to encode and thus
562+
// wouldn't fit the space.
563+
564+
if (memcmp(inst, "\x48\x03\x25", 3) == 0) {
565+
// "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
566+
memcpy(inst, "\x48\x81\xc4", 3);
567+
} else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {
568+
// "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
569+
memcpy(inst, "\x49\x81\xc4", 3);
570+
} else if (memcmp(inst, "\x4c\x03", 2) == 0) {
571+
// "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
572+
memcpy(inst, "\x4d\x8d", 2);
573+
*regSlot = 0x80 | (reg << 3) | reg;
574+
} else if (memcmp(inst, "\x48\x03", 2) == 0) {
575+
// "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
576+
memcpy(inst, "\x48\x8d", 2);
577+
*regSlot = 0x80 | (reg << 3) | reg;
578+
} else if (memcmp(inst, "\x4c\x8b", 2) == 0) {
579+
// "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
580+
memcpy(inst, "\x49\xc7", 2);
581+
*regSlot = 0xc0 | reg;
582+
} else if (memcmp(inst, "\x48\x8b", 2) == 0) {
583+
// "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
584+
memcpy(inst, "\x48\xc7", 2);
585+
*regSlot = 0xc0 | reg;
586+
} else {
587+
ErrAlways(ctx)
588+
<< getErrorLoc(ctx, loc - 3)
589+
<< "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
590+
}
584591
} else {
585-
ErrAlways(ctx)
586-
<< getErrorLoc(ctx, loc - 3)
587-
<< "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only";
592+
assert(rel.type == R_X86_64_CODE_4_GOTTPOFF &&
593+
"Unsupported relocation type!");
594+
assert((loc[-4] == 0xd5) &&
595+
"Invalid prefix with R_X86_64_CODE_4_GOTTPOFF!");
596+
const uint8_t rex = loc[-3];
597+
loc[-3] = (rex & ~0x44) | (rex & 0x44) >> 2;
598+
*regSlot = 0xc0 | reg;
599+
if (loc[-2] == 0x8b)
600+
loc[-2] = 0xc7;
601+
else
602+
loc[-2] = 0x81;
588603
}
589604

590605
// The original code used a PC relative relocation.
@@ -741,6 +756,7 @@ int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const {
741756
case R_X86_64_REX2_GOTPCRELX:
742757
case R_X86_64_PC32:
743758
case R_X86_64_GOTTPOFF:
759+
case R_X86_64_CODE_4_GOTTPOFF:
744760
case R_X86_64_PLT32:
745761
case R_X86_64_TLSGD:
746762
case R_X86_64_TLSLD:
@@ -850,6 +866,7 @@ void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
850866
}
851867
break;
852868
case R_X86_64_GOTTPOFF:
869+
case R_X86_64_CODE_4_GOTTPOFF:
853870
if (rel.expr == R_RELAX_TLS_IE_TO_LE) {
854871
relaxTlsIeToLe(loc, rel, val);
855872
} else {

lld/test/ELF/pack-dyn-relocs-tls-x86-64.s

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,16 @@
1212
.globl foo
1313
foo:
1414
movq tlsvar@GOTTPOFF(%rip), %rcx
15+
movq tlsvar2@GOTTPOFF(%rip), %r31
16+
1517

1618
.section .tdata,"awT",@progbits
1719
.space 0x1234
1820
tlsvar:
1921
.word 42
20-
22+
tlsvar2:
23+
.word 42
2124
// CHECK: Section ({{.+}}) .rela.dyn {
2225
// CHECK-NEXT: R_X86_64_TPOFF64 - 0x1234
26+
// CHECK-NEXT: R_X86_64_TPOFF64 - 0x1236
2327
// CHECK-NEXT: }

lld/test/ELF/tls-opt.s

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@
2020
// DISASM-NEXT: leaq -4(%r15), %r15
2121
// DISASM-NEXT: addq $-4, %rsp
2222
// DISASM-NEXT: addq $-4, %r12
23+
// DISASM-NEXT: movq $-8, %r16
24+
// DISASM-NEXT: movq $-8, %r20
25+
// DISASM-NEXT: movq $-4, %r16
26+
// DISASM-NEXT: addq $-8, %r16
27+
// DISASM-NEXT: addq $-8, %r28
28+
// DISASM-NEXT: addq $-4, %r16
2329

2430
// LD to LE:
2531
// DISASM-NEXT: movq %fs:0, %rax
@@ -69,6 +75,13 @@ _start:
6975
addq tls1@GOTTPOFF(%rip), %r15
7076
addq tls1@GOTTPOFF(%rip), %rsp
7177
addq tls1@GOTTPOFF(%rip), %r12
78+
# EGPR
79+
movq tls0@GOTTPOFF(%rip), %r16
80+
movq tls0@GOTTPOFF(%rip), %r20
81+
movq tls1@GOTTPOFF(%rip), %r16
82+
addq tls0@GOTTPOFF(%rip), %r16
83+
addq tls0@GOTTPOFF(%rip), %r28
84+
addq tls1@GOTTPOFF(%rip), %r16
7285

7386
// LD to LE
7487
leaq tls0@tlsld(%rip), %rdi

lld/test/ELF/x86-64-tls-ie-local.s

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,38 @@
55
# RUN: llvm-readobj -r %t.so | FileCheck --check-prefix=REL %s
66
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t.so | FileCheck %s
77

8-
# SEC: .got PROGBITS 0000000000002338 000338 000010 00 WA 0 0 8
8+
# SEC: .got PROGBITS 0000000000002378 000378 000020 00 WA 0 0 8
99

1010
## Dynamic relocations for non-preemptable symbols in a shared object have section index 0.
1111
# REL: .rela.dyn {
12-
# REL-NEXT: 0x2338 R_X86_64_TPOFF64 - 0x0
13-
# REL-NEXT: 0x2340 R_X86_64_TPOFF64 - 0x4
12+
# REL-NEXT: 0x2378 R_X86_64_TPOFF64 - 0x0
13+
# REL-NEXT: 0x2380 R_X86_64_TPOFF64 - 0x8
14+
# REL-NEXT: 0x2388 R_X86_64_TPOFF64 - 0x4
15+
# REL-NEXT: 0x2390 R_X86_64_TPOFF64 - 0xC
1416
# REL-NEXT: }
1517

16-
## &.got[0] - 0x127f = 0x2338 - 0x127f = 4281
17-
## &.got[1] - 0x1286 = 0x2340 - 0x1286 = 4282
18-
# CHECK: 1278: addq 4281(%rip), %rax
19-
# CHECK-NEXT: 127f: addq 4282(%rip), %rax
18+
## &.got[0] - 0x12af = 0x2378 - 0x12af = 4297
19+
## &.got[1] - 0x12b6 = 0x2380 - 0x12b6 = 4298
20+
## &.got[2] - 0x12be = 0x2388 - 0x12be = 4298
21+
## &.got[3] - 0x12c6 = 0x2390 - 0x12c6 = 4298
22+
23+
# CHECK: 12a8: addq 4297(%rip), %rax
24+
# CHECK-NEXT: 12af: addq 4298(%rip), %rax
25+
# CHECK-NEXT: 12b6: addq 4298(%rip), %r16
26+
# CHECK-NEXT: 12be: addq 4298(%rip), %r16
2027

2128
addq foo@GOTTPOFF(%rip), %rax
2229
addq bar@GOTTPOFF(%rip), %rax
30+
addq foo2@GOTTPOFF(%rip), %r16
31+
addq bar2@GOTTPOFF(%rip), %r16
32+
2333

2434
.section .tbss,"awT",@nobits
2535
foo:
2636
.long 0
37+
foo2:
38+
.long 0
2739
bar:
2840
.long 0
41+
bar2:
42+
.long 0

0 commit comments

Comments
 (0)