Skip to content

Commit 853e126

Browse files
committed
[BOLT] Support input binaries that use R_X86_GOTPC64
In large code model, the address of GOT is calculated by the static linker via R_X86_GOTPC64 reloc applied against a MOVABSQ instruction. In the final binary, it can be disassembled as a regular immediate, but because such immediate is the result of PC-relative pointer arithmetic, we need to parse this relocation and update this calculation whenever we move code, otherwise we break the code trying to read GOT. A test case showing how GOT is accessed was provided. Reviewed By: #bolt, maksfb Differential Revision: https://reviews.llvm.org/D158911
1 parent a29e8ef commit 853e126

File tree

11 files changed

+207
-1
lines changed

11 files changed

+207
-1
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,15 @@ class BinaryContext {
871871
return nullptr;
872872
}
873873

874+
/// Retrieves a reference to ELF's _GLOBAL_OFFSET_TABLE_ symbol, which points
875+
/// at GOT, or null if it is not present in the input binary symtab.
876+
BinaryData *getGOTSymbol();
877+
878+
/// Checks if symbol name refers to ELF's _GLOBAL_OFFSET_TABLE_ symbol
879+
bool isGOTSymbol(StringRef SymName) const {
880+
return SymName == "_GLOBAL_OFFSET_TABLE_";
881+
}
882+
874883
/// Return true if \p SymbolName was generated internally and was not present
875884
/// in the input binary.
876885
bool isInternalSymbolName(const StringRef Name) {

bolt/include/bolt/Core/Relocation.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ struct Relocation {
8484

8585
/// Special relocation type that allows the linker to modify the instruction.
8686
static bool isX86GOTPCRELX(uint64_t Type);
87+
static bool isX86GOTPC64(uint64_t Type);
8788

8889
/// Return true if relocation type is NONE
8990
static bool isNone(uint64_t Type);

bolt/lib/Core/BinaryContext.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,31 @@ BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
10261026
return nullptr;
10271027
}
10281028

1029+
BinaryData *BinaryContext::getGOTSymbol() {
1030+
// First tries to find a global symbol with that name
1031+
BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
1032+
if (GOTSymBD)
1033+
return GOTSymBD;
1034+
1035+
// This symbol might be hidden from run-time link, so fetch the local
1036+
// definition if available.
1037+
GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
1038+
if (!GOTSymBD)
1039+
return nullptr;
1040+
1041+
// If the local symbol is not unique, fail
1042+
unsigned Index = 2;
1043+
SmallString<30> Storage;
1044+
while (const BinaryData *BD =
1045+
getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
1046+
.concat(Twine(Index++))
1047+
.toStringRef(Storage)))
1048+
if (BD->getAddress() != GOTSymBD->getAddress())
1049+
return nullptr;
1050+
1051+
return GOTSymBD;
1052+
}
1053+
10291054
bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
10301055
auto NI = BinaryDataMap.find(Address);
10311056
assert(NI != BinaryDataMap.end());

bolt/lib/Core/Relocation.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ static bool isSupportedX86(uint64_t Type) {
3535
case ELF::R_X86_64_PC32:
3636
case ELF::R_X86_64_PC64:
3737
case ELF::R_X86_64_PLT32:
38+
case ELF::R_X86_64_GOTPC64:
3839
case ELF::R_X86_64_GOTPCREL:
3940
case ELF::R_X86_64_GOTTPOFF:
4041
case ELF::R_X86_64_TPOFF32:
@@ -136,6 +137,7 @@ static size_t getSizeForTypeX86(uint64_t Type) {
136137
return 4;
137138
case ELF::R_X86_64_PC64:
138139
case ELF::R_X86_64_64:
140+
case ELF::R_X86_64_GOTPC64:
139141
return 8;
140142
}
141143
}
@@ -655,6 +657,7 @@ static bool isPCRelativeX86(uint64_t Type) {
655657
case ELF::R_X86_64_PLT32:
656658
case ELF::R_X86_64_GOTOFF64:
657659
case ELF::R_X86_64_GOTPC32:
660+
case ELF::R_X86_64_GOTPC64:
658661
case ELF::R_X86_64_GOTTPOFF:
659662
case ELF::R_X86_64_GOTPCRELX:
660663
case ELF::R_X86_64_REX_GOTPCRELX:
@@ -797,6 +800,12 @@ bool Relocation::isX86GOTPCRELX(uint64_t Type) {
797800
return Type == ELF::R_X86_64_GOTPCRELX || Type == ELF::R_X86_64_REX_GOTPCRELX;
798801
}
799802

803+
bool Relocation::isX86GOTPC64(uint64_t Type) {
804+
if (Arch != Triple::x86_64)
805+
return false;
806+
return Type == ELF::R_X86_64_GOTPC64;
807+
}
808+
800809
bool Relocation::isNone(uint64_t Type) { return Type == getNone(); }
801810

802811
bool Relocation::isRelative(uint64_t Type) {

bolt/lib/Rewrite/JITLinkLinker.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,19 @@ struct JITLinkLinker::Context : jitlink::JITLinkContext {
141141
orc::ExecutorAddr(Address), JITSymbolFlags());
142142
continue;
143143
}
144+
145+
if (Linker.BC.isGOTSymbol(SymName)) {
146+
if (const BinaryData *I = Linker.BC.getGOTSymbol()) {
147+
uint64_t Address =
148+
I->isMoved() ? I->getOutputAddress() : I->getAddress();
149+
LLVM_DEBUG(dbgs() << "Resolved to address 0x"
150+
<< Twine::utohexstr(Address) << "\n");
151+
AllResults[Symbol.first] = orc::ExecutorSymbolDef(
152+
orc::ExecutorAddr(Address), JITSymbolFlags());
153+
continue;
154+
}
155+
}
156+
144157
LLVM_DEBUG(dbgs() << "Resolved to address 0x0\n");
145158
AllResults[Symbol.first] =
146159
orc::ExecutorSymbolDef(orc::ExecutorAddr(0), JITSymbolFlags());

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2394,9 +2394,13 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
23942394
}
23952395

23962396
MCSymbol *ReferencedSymbol = nullptr;
2397-
if (!IsSectionRelocation)
2397+
if (!IsSectionRelocation) {
23982398
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
23992399
ReferencedSymbol = BD->getSymbol();
2400+
else if (BC->isGOTSymbol(SymbolName))
2401+
if (BinaryData *BD = BC->getGOTSymbol())
2402+
ReferencedSymbol = BD->getSymbol();
2403+
}
24002404

24012405
ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address};
24022406
symbol_iterator SymbolIter = Rel.getSymbol();

bolt/lib/Target/X86/X86MCPlusBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
403403
case ELF::R_X86_64_PC8:
404404
case ELF::R_X86_64_PC32:
405405
case ELF::R_X86_64_PC64:
406+
case ELF::R_X86_64_GOTPC64:
406407
case ELF::R_X86_64_GOTPCRELX:
407408
case ELF::R_X86_64_REX_GOTPCRELX:
408409
return true;

bolt/lib/Target/X86/X86MCSymbolizer.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,15 @@ bool X86MCSymbolizer::tryAddingSymbolicOperand(
130130
if (!Relocation)
131131
return processPCRelOperandNoRel();
132132

133+
// GOTPC64 is special because the X86 Assembler doesn't know how to emit
134+
// a PC-relative 8-byte fixup, which is what we need to cover this. The
135+
// only way to do this is to use the symbol name _GLOBAL_OFFSET_TABLE_.
136+
if (Relocation::isX86GOTPC64(Relocation->Type)) {
137+
auto [Sym, Addend] = handleGOTPC64(*Relocation, InstAddress);
138+
addOperand(Sym, Addend);
139+
return true;
140+
}
141+
133142
uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
134143
if (Relocation->isPCRelative())
135144
SymbolValue += InstAddress + ImmOffset;
@@ -149,6 +158,26 @@ bool X86MCSymbolizer::tryAddingSymbolicOperand(
149158
return true;
150159
}
151160

161+
std::pair<MCSymbol *, uint64_t>
162+
X86MCSymbolizer::handleGOTPC64(const Relocation &R, uint64_t InstrAddr) {
163+
BinaryContext &BC = Function.getBinaryContext();
164+
const BinaryData *GOTSymBD = BC.getGOTSymbol();
165+
if (!GOTSymBD || !GOTSymBD->getAddress()) {
166+
errs() << "BOLT-ERROR: R_X86_GOTPC64 relocation is present but we did "
167+
"not detect a valid _GLOBAL_OFFSET_TABLE_ in symbol table\n";
168+
exit(1);
169+
}
170+
// R_X86_GOTPC64 are not relative to the Reloc nor end of instruction,
171+
// but the start of the MOVABSQ instruction. So the Target Address is
172+
// whatever is encoded in the original operand when we disassembled
173+
// the binary (here, R.Value) plus MOVABSQ address (InstrAddr).
174+
// Here we extract the intended Addend by subtracting the real
175+
// GOT addr.
176+
const int64_t Addend = R.Value + InstrAddr - GOTSymBD->getAddress();
177+
return std::make_pair(BC.Ctx->getOrCreateSymbol("_GLOBAL_OFFSET_TABLE_"),
178+
Addend);
179+
}
180+
152181
void X86MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
153182
int64_t Value,
154183
uint64_t Address) {}

bolt/lib/Target/X86/X86MCSymbolizer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ class X86MCSymbolizer : public MCSymbolizer {
2020
BinaryFunction &Function;
2121
bool CreateNewSymbols{true};
2222

23+
std::pair<MCSymbol *, uint64_t> handleGOTPC64(const Relocation &R,
24+
uint64_t InstrAddr);
25+
2326
public:
2427
X86MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
2528
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# A variation of gotoff-large-code-model.s that accesses GOT value
2+
# with a slightly different code sequence.
3+
4+
# REQUIRES: system-linux
5+
6+
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
7+
# RUN: %s -o %t.o
8+
# RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q
9+
10+
# RUN: llvm-bolt %t.exe --funcs init_impls --lite \
11+
# RUN: -o %t.bolted
12+
# RUN: %t.bolted | FileCheck %s
13+
14+
.section .rodata.str1.1,"aMS",@progbits,1
15+
.LC2:
16+
.string "Hello, world\n"
17+
.text
18+
.p2align 4
19+
.globl init_impls
20+
.type init_impls, @function
21+
init_impls:
22+
.cfi_startproc
23+
push %rbp
24+
mov %rsp,%rbp
25+
push %r15
26+
push %rbx
27+
sub $0x8,%rsp
28+
lea 1f(%rip),%rbx
29+
# R_X86_64_GOTPC64 _GLOBAL_OFFSET_TABLE_+0x2
30+
1: movabsq $_GLOBAL_OFFSET_TABLE_, %r11
31+
add %r11,%rbx
32+
# R_X86_64_GOTOFF64 .LC2
33+
movabs $.LC2@gotoff,%rax
34+
lea (%rbx,%rax,1),%rax
35+
mov %rax,%rdi
36+
mov %rbx,%r15
37+
# R_X86_64_PLTOFF64 puts
38+
movabs $puts@pltoff,%rax
39+
add %rbx,%rax
40+
call *%rax
41+
add $0x8,%rsp
42+
pop %rbx
43+
pop %r15
44+
pop %rbp
45+
retq
46+
.cfi_endproc
47+
.size init_impls, .-init_impls
48+
49+
.globl main
50+
.type main, @function
51+
.p2align 4
52+
main:
53+
callq init_impls
54+
xorq %rax, %rax
55+
ret
56+
57+
# CHECK: Hello, world
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# REQUIRES: system-linux
2+
3+
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
4+
# RUN: %s -o %t.o
5+
# RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q
6+
7+
# RUN: llvm-bolt %t.exe --funcs init_impls --lite \
8+
# RUN: -o %t.bolted
9+
# RUN: %t.bolted | FileCheck %s
10+
11+
.section .rodata.str1.1,"aMS",@progbits,1
12+
.LC2:
13+
.string "Hello, world\n"
14+
.text
15+
.p2align 4
16+
.globl init_impls
17+
.type init_impls, @function
18+
init_impls:
19+
.cfi_startproc
20+
push %rbp
21+
mov %rsp,%rbp
22+
push %r15
23+
push %rbx
24+
sub $0x8,%rsp
25+
1:
26+
lea 1b(%rip),%rbx
27+
# R_X86_64_GOTPC64 _GLOBAL_OFFSET_TABLE_+0x9
28+
movabsq $_GLOBAL_OFFSET_TABLE_-1b, %r11
29+
add %r11,%rbx
30+
# R_X86_64_GOTOFF64 .LC2
31+
movabs $.LC2@gotoff,%rax
32+
lea (%rbx,%rax,1),%rax
33+
mov %rax,%rdi
34+
mov %rbx,%r15
35+
# R_X86_64_PLTOFF64 puts
36+
movabs $puts@pltoff,%rax
37+
add %rbx,%rax
38+
call *%rax
39+
add $0x8,%rsp
40+
pop %rbx
41+
pop %r15
42+
pop %rbp
43+
retq
44+
.cfi_endproc
45+
.size init_impls, .-init_impls
46+
47+
.globl main
48+
.type main, @function
49+
.p2align 4
50+
main:
51+
callq init_impls
52+
xorq %rax, %rax
53+
ret
54+
55+
# CHECK: Hello, world

0 commit comments

Comments
 (0)