Skip to content

Commit a2639dc

Browse files
committed
[ORC] Add a utility for adding missing "self" relocations to a Symbol
If a tool wants to introduce new indirections via stubs at link-time in ORC, it can cause fidelity issues around the address of the function if some references to the function do not have relocations. This is known to happen inside the body of the function itself on x86_64 for example, where a PC-relative address is formed, but without a relocation. ``` _foo: leaq -7(%rip), %rax ## form pointer to '_foo' without relocation _bar: leaq (%rip), %rax ## uses X86_64_RELOC_SIGNED to '_foo' ``` The consequence of introducing a stub for such a function at link time is that if it forms a pointer to itself without relocation, it will not have the same value as a pointer from outside the function. If the function pointer is used as a key, this can cause problems. This utility provides best-effort support for adding such missing relocations using MCDisassembler and MCInstrAnalysis to identify the problematic instructions. Currently it is only implemented for x86_64. Note: the related issue with call/jump instructions is not handled here, only forming function pointers. rdar://83514317 Differential revision: https://reviews.llvm.org/D113038
1 parent 41481b7 commit a2639dc

File tree

10 files changed

+297
-11
lines changed

10 files changed

+297
-11
lines changed

llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ class PointerType;
4545
class Triple;
4646
class Twine;
4747
class Value;
48+
class MCDisassembler;
49+
class MCInstrAnalysis;
50+
51+
namespace jitlink {
52+
class LinkGraph;
53+
class Symbol;
54+
} // namespace jitlink
4855

4956
namespace orc {
5057

@@ -557,6 +564,33 @@ GlobalAlias *cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
557564
void cloneModuleFlagsMetadata(Module &Dst, const Module &Src,
558565
ValueToValueMapTy &VMap);
559566

567+
/// Introduce relocations to \p Sym in its own definition if there are any
568+
/// pointers formed via PC-relative address that do not already have a
569+
/// relocation.
570+
///
571+
/// This is useful when introducing indirection via a stub function at link time
572+
/// without compiler support. If a function pointer is formed without a
573+
/// relocation, e.g. in the definition of \c foo
574+
///
575+
/// \code
576+
/// _foo:
577+
/// leaq -7(%rip), rax # form pointer to _foo without relocation
578+
/// _bar:
579+
/// leaq (%rip), %rax # uses X86_64_RELOC_SIGNED to '_foo'
580+
/// \endcode
581+
///
582+
/// the pointer to \c _foo computed by \c _foo and \c _bar may differ if we
583+
/// introduce a stub for _foo. If the pointer is used as a key, this may be
584+
/// observable to the program. This pass will attempt to introduce the missing
585+
/// "self-relocation" on the leaq instruction.
586+
///
587+
/// This is based on disassembly and should be considered "best effort". It may
588+
/// silently fail to add relocations.
589+
Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym,
590+
jitlink::LinkGraph &G,
591+
MCDisassembler &Disassembler,
592+
MCInstrAnalysis &MIA);
593+
560594
} // end namespace orc
561595

562596
} // end namespace llvm

llvm/include/llvm/MC/MCInstrAnalysis.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,11 @@ class MCInstrAnalysis {
158158
evaluateMemoryOperandAddress(const MCInst &Inst, const MCSubtargetInfo *STI,
159159
uint64_t Addr, uint64_t Size) const;
160160

161+
/// Given an instruction with a memory operand that could require relocation,
162+
/// returns the offset within the instruction of that relocation.
163+
virtual Optional<uint64_t>
164+
getMemoryOperandRelocationOffset(const MCInst &Inst, uint64_t Size) const;
165+
161166
/// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries.
162167
virtual std::vector<std::pair<uint64_t, uint64_t>>
163168
findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,

llvm/lib/ExecutionEngine/Orc/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ add_llvm_component_library(LLVMOrcJIT
5151
OrcShared
5252
OrcTargetProcess
5353
MC
54+
MCDisassembler
5455
Passes
5556
RuntimeDyld
5657
Support

llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,17 @@
99
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
1010
#include "llvm/ADT/STLExtras.h"
1111
#include "llvm/ADT/Triple.h"
12+
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
1213
#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
1314
#include "llvm/IR/IRBuilder.h"
15+
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
16+
#include "llvm/MC/MCInstrAnalysis.h"
1417
#include "llvm/Support/Format.h"
1518
#include "llvm/Transforms/Utils/Cloning.h"
1619
#include <sstream>
1720

21+
#define DEBUG_TYPE "orc"
22+
1823
using namespace llvm;
1924
using namespace llvm::orc;
2025

@@ -372,5 +377,77 @@ void cloneModuleFlagsMetadata(Module &Dst, const Module &Src,
372377
Dst.addModuleFlag(MapMetadata(MF, VMap));
373378
}
374379

380+
Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym,
381+
jitlink::LinkGraph &G,
382+
MCDisassembler &Disassembler,
383+
MCInstrAnalysis &MIA) {
384+
// AArch64 appears to already come with the necessary relocations. Among other
385+
// architectures, only x86_64 is currently implemented here.
386+
if (G.getTargetTriple().getArch() != Triple::x86_64)
387+
return Error::success();
388+
389+
raw_null_ostream CommentStream;
390+
auto &STI = Disassembler.getSubtargetInfo();
391+
392+
// Determine the function bounds
393+
auto &B = Sym.getBlock();
394+
assert(!B.isZeroFill() && "expected content block");
395+
auto SymAddress = Sym.getAddress();
396+
auto SymStartInBlock =
397+
(const uint8_t *)B.getContent().data() + Sym.getOffset();
398+
auto SymSize = Sym.getSize() ? Sym.getSize() : B.getSize() - Sym.getOffset();
399+
auto Content = makeArrayRef(SymStartInBlock, SymSize);
400+
401+
LLVM_DEBUG(dbgs() << "Adding self-relocations to " << Sym.getName() << "\n");
402+
403+
SmallDenseSet<uintptr_t, 8> ExistingRelocations;
404+
for (auto &E : B.edges()) {
405+
if (E.isRelocation())
406+
ExistingRelocations.insert(E.getOffset());
407+
}
408+
409+
size_t I = 0;
410+
while (I < Content.size()) {
411+
MCInst Instr;
412+
uint64_t InstrSize = 0;
413+
uint64_t InstrStart = SymAddress + I;
414+
auto DecodeStatus = Disassembler.getInstruction(
415+
Instr, InstrSize, Content.drop_front(I), InstrStart, CommentStream);
416+
if (DecodeStatus != MCDisassembler::Success) {
417+
LLVM_DEBUG(dbgs() << "Aborting due to disassembly failure at address "
418+
<< InstrStart);
419+
return make_error<StringError>(
420+
formatv("failed to disassemble at address {0:x16}", InstrStart),
421+
inconvertibleErrorCode());
422+
}
423+
// Advance to the next instruction.
424+
I += InstrSize;
425+
426+
// Check for a PC-relative address equal to the symbol itself.
427+
auto PCRelAddr =
428+
MIA.evaluateMemoryOperandAddress(Instr, &STI, InstrStart, InstrSize);
429+
if (!PCRelAddr.hasValue() || PCRelAddr.getValue() != SymAddress)
430+
continue;
431+
432+
auto RelocOffInInstr =
433+
MIA.getMemoryOperandRelocationOffset(Instr, InstrSize);
434+
if (!RelocOffInInstr.hasValue() ||
435+
InstrSize - RelocOffInInstr.getValue() != 4) {
436+
LLVM_DEBUG(dbgs() << "Skipping unknown self-relocation at "
437+
<< InstrStart);
438+
continue;
439+
}
440+
441+
auto RelocOffInBlock =
442+
InstrStart + *RelocOffInInstr - SymAddress + Sym.getOffset();
443+
if (ExistingRelocations.contains(RelocOffInBlock))
444+
continue;
445+
446+
LLVM_DEBUG(dbgs() << "Adding delta32 self-relocation at " << InstrStart);
447+
B.addEdge(jitlink::x86_64::Delta32, RelocOffInBlock, Sym, /*Addend=*/-4);
448+
}
449+
return Error::success();
450+
}
451+
375452
} // End namespace orc.
376453
} // End namespace llvm.

llvm/lib/MC/MCInstrAnalysis.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,9 @@ Optional<uint64_t> MCInstrAnalysis::evaluateMemoryOperandAddress(
3434
uint64_t Size) const {
3535
return None;
3636
}
37+
38+
Optional<uint64_t>
39+
MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst,
40+
uint64_t Size) const {
41+
return None;
42+
}

llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,9 @@ class X86MCInstrAnalysis : public MCInstrAnalysis {
408408
const MCSubtargetInfo *STI,
409409
uint64_t Addr,
410410
uint64_t Size) const override;
411+
Optional<uint64_t>
412+
getMemoryOperandRelocationOffset(const MCInst &Inst,
413+
uint64_t Size) const override;
411414
};
412415

413416
#define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS
@@ -557,6 +560,30 @@ Optional<uint64_t> X86MCInstrAnalysis::evaluateMemoryOperandAddress(
557560
return None;
558561
}
559562

563+
Optional<uint64_t>
564+
X86MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst,
565+
uint64_t Size) const {
566+
if (Inst.getOpcode() != X86::LEA64r)
567+
return None;
568+
const MCInstrDesc &MCID = Info->get(Inst.getOpcode());
569+
int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags);
570+
if (MemOpStart == -1)
571+
return None;
572+
MemOpStart += X86II::getOperandBias(MCID);
573+
const MCOperand &SegReg = Inst.getOperand(MemOpStart + X86::AddrSegmentReg);
574+
const MCOperand &BaseReg = Inst.getOperand(MemOpStart + X86::AddrBaseReg);
575+
const MCOperand &IndexReg = Inst.getOperand(MemOpStart + X86::AddrIndexReg);
576+
const MCOperand &ScaleAmt = Inst.getOperand(MemOpStart + X86::AddrScaleAmt);
577+
const MCOperand &Disp = Inst.getOperand(MemOpStart + X86::AddrDisp);
578+
// Must be a simple rip-relative address.
579+
if (BaseReg.getReg() != X86::RIP || SegReg.getReg() != 0 ||
580+
IndexReg.getReg() != 0 || ScaleAmt.getImm() != 1 || !Disp.isImm())
581+
return None;
582+
// rip-relative ModR/M immediate is 32 bits.
583+
assert(Size > 4 && "invalid instruction size for rip-relative lea");
584+
return Size - 4;
585+
}
586+
560587
} // end of namespace X86_MC
561588

562589
} // end of namespace llvm
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
.section __TEXT,__text,regular,pure_instructions
2+
3+
.globl _form_func_ptr
4+
_form_func_ptr:
5+
leaq _form_func_ptr(%rip), %rax
6+
leaq _other(%rip), %rax
7+
leaq _form_func_ptr(%rip), %rax
8+
nop
9+
leaq _form_func_ptr(%rip), %rax
10+
retq
11+
12+
.globl _other
13+
_other:
14+
leaq _form_func_ptr(%rip), %rax
15+
retq
16+
17+
# Return 0 if the pointers formed inside and outside the function are the same.
18+
.globl _main
19+
_main:
20+
pushq %rbp
21+
movq %rsp, %rbp
22+
subq $32, %rsp
23+
movl $0, -4(%rbp)
24+
callq _form_func_ptr
25+
movq %rax, -16(%rbp)
26+
callq _other
27+
movq %rax, -24(%rbp)
28+
movq -16(%rbp), %rax
29+
cmpq -24(%rbp), %rax
30+
setne %al
31+
andb $1, %al
32+
movzbl %al, %eax
33+
addq $32, %rsp
34+
popq %rbp
35+
retq
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t %S/Inputs/x86-64_self_relocation.s
2+
# RUN: llvm-jitlink -noexec -show-graph %t 2>&1 | \
3+
# RUN: FileCheck -check-prefix=WITHOUT %s
4+
# RUN: llvm-jitlink -noexec -show-graph -add-self-relocations %t 2>&1 | \
5+
# RUN: FileCheck -check-prefix=CHECK %s
6+
7+
# WITHOUT: block
8+
# WITHOUT-NEXT: symbols
9+
# WITHOUT-NEXT: _form_func_ptr
10+
# WITHOUT-NEXT: edges
11+
# WITHOUT-NEXT: (block + 0x0000000a), addend = -0x00000004, kind = Delta32, target = _other
12+
# WITHOUT-NOT: kind =
13+
14+
# CHECK: block
15+
# CHECK-NEXT: symbols
16+
# CHECK-NEXT: _form_func_ptr
17+
# CHECK-NEXT: edges
18+
# CHECK-NEXT: (block + 0x00000003), addend = -0x00000004, kind = Delta32, target = _form_func_ptr
19+
# CHECK-NEXT: (block + 0x0000000a), addend = -0x00000004, kind = Delta32, target = _other
20+
# CHECK-NEXT: (block + 0x00000011), addend = -0x00000004, kind = Delta32, target = _form_func_ptr
21+
# CHECK-NEXT: (block + 0x00000019), addend = -0x00000004, kind = Delta32, target = _form_func_ptr
22+
# CHECK-NOT: kind =
23+
24+
# WITHOUT: block
25+
# WITHOUT-NEXT: symbols
26+
# WITHOUT-NEXT: _other
27+
# WITHOUT-NEXT: edges
28+
# WITHOUT-NEXT: kind = Delta32, target = _form_func_ptr
29+
30+
# CHECK: block
31+
# CHECK-NEXT: symbols
32+
# CHECK-NEXT: _other
33+
# CHECK-NEXT: edges
34+
# CHECK-NEXT: kind = Delta32, target = _form_func_ptr
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t %S/Inputs/x86-64_self_relocation.s
2+
# RUN: llvm-jitlink -show-graph %t
3+
# RUN: llvm-jitlink -show-graph -add-self-relocations %t
4+
5+
# Ensure that the added relocation does not create an incorrect pointer.
6+
7+
# Execution test
8+
# REQUIRES: system-darwin && native && target-x86_64

0 commit comments

Comments
 (0)