Skip to content

Commit bac2171

Browse files
authored
[BOLT] Pass unfiltered relocations to disassembler. NFCI (#131202)
Instead of filtering and modifying relocations in readRelocations(), preserve the relocation info and use it in the symbolizing disassembler. This change mostly affects AArch64, where we need to look at original linker relocations in order to properly symbolize instruction operands.
1 parent d52ec1e commit bac2171

File tree

8 files changed

+113
-133
lines changed

8 files changed

+113
-133
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,16 @@ class MCPlusBuilder {
637637
return false;
638638
}
639639

640+
virtual bool isAddXri(const MCInst &Inst) const {
641+
llvm_unreachable("not implemented");
642+
return false;
643+
}
644+
645+
virtual bool isMOVW(const MCInst &Inst) const {
646+
llvm_unreachable("not implemented");
647+
return false;
648+
}
649+
640650
virtual bool isMoveMem2Reg(const MCInst &Inst) const { return false; }
641651

642652
virtual bool mayLoad(const MCInst &Inst) const {

bolt/include/bolt/Core/Relocation.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,6 @@ struct Relocation {
6464
/// Skip relocations that we don't want to handle in BOLT
6565
static bool skipRelocationType(uint32_t Type);
6666

67-
/// Handle special cases when relocation should not be processed by BOLT or
68-
/// change relocation \p Type to proper one before continuing if \p Contents
69-
/// and \P Type mismatch occurred.
70-
static bool skipRelocationProcess(uint32_t &Type, uint64_t Contents);
71-
7267
/// Adjust value depending on relocation type (make it PC relative or not).
7368
static uint64_t encodeValue(uint32_t Type, uint64_t Value, uint64_t PC);
7469

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,10 +1473,19 @@ Error BinaryFunction::disassemble() {
14731473
}
14741474
}
14751475

1476+
uint64_t Addend = Relocation.Addend;
1477+
1478+
// For GOT relocations, create a reference against GOT entry ignoring
1479+
// the relocation symbol.
1480+
if (Relocation::isGOT(Relocation.Type)) {
1481+
assert(Relocation::isPCRelative(Relocation.Type) &&
1482+
"GOT relocation must be PC-relative on RISC-V");
1483+
Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
1484+
Addend = Relocation.Value + Relocation.Offset + getAddress();
1485+
}
14761486
int64_t Value = Relocation.Value;
14771487
const bool Result = BC.MIB->replaceImmWithSymbolRef(
1478-
Instruction, Symbol, Relocation.Addend, Ctx.get(), Value,
1479-
Relocation.Type);
1488+
Instruction, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
14801489
(void)Result;
14811490
assert(Result && "cannot replace immediate with relocation");
14821491
}

bolt/lib/Core/Relocation.cpp

Lines changed: 0 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -257,78 +257,6 @@ static bool skipRelocationTypeRISCV(uint32_t Type) {
257257
}
258258
}
259259

260-
static bool skipRelocationProcessX86(uint32_t &Type, uint64_t Contents) {
261-
return false;
262-
}
263-
264-
static bool skipRelocationProcessAArch64(uint32_t &Type, uint64_t Contents) {
265-
auto IsMov = [](uint64_t Contents) -> bool {
266-
// The bits 28-23 are 0b100101
267-
return (Contents & 0x1f800000) == 0x12800000;
268-
};
269-
270-
auto IsB = [](uint64_t Contents) -> bool {
271-
// The bits 31-26 are 0b000101
272-
return (Contents & 0xfc000000) == 0x14000000;
273-
};
274-
275-
auto IsAddImm = [](uint64_t Contents) -> bool {
276-
// The bits 30-23 are 0b00100010
277-
return (Contents & 0x7F800000) == 0x11000000;
278-
};
279-
280-
// The linker might relax ADRP+LDR instruction sequence for loading symbol
281-
// address from GOT table to ADRP+ADD sequence that would point to the
282-
// binary-local symbol. Change relocation type in order to process it right.
283-
if (Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && IsAddImm(Contents)) {
284-
Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
285-
return false;
286-
}
287-
288-
// The linker might perform TLS relocations relaxations, such as
289-
// changed TLS access model (e.g. changed global dynamic model
290-
// to initial exec), thus changing the instructions. The static
291-
// relocations might be invalid at this point and we might no
292-
// need to process these relocations anymore.
293-
// More information could be found by searching
294-
// elfNN_aarch64_tls_relax in bfd
295-
switch (Type) {
296-
default:
297-
break;
298-
case ELF::R_AARCH64_TLSDESC_LD64_LO12:
299-
case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
300-
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
301-
case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: {
302-
if (IsMov(Contents))
303-
return true;
304-
}
305-
}
306-
307-
// The linker might replace load/store instruction with jump and
308-
// veneer due to errata 843419
309-
// https://documentation-service.arm.com/static/5fa29fddb209f547eebd361d
310-
// Thus load/store relocations for these instructions must be ignored
311-
// NOTE: We only process GOT and TLS relocations this way since the
312-
// addend used in load/store instructions won't change after bolt
313-
// (it is important since the instruction in veneer won't have relocation)
314-
switch (Type) {
315-
default:
316-
break;
317-
case ELF::R_AARCH64_LD64_GOT_LO12_NC:
318-
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
319-
case ELF::R_AARCH64_TLSDESC_LD64_LO12: {
320-
if (IsB(Contents))
321-
return true;
322-
}
323-
}
324-
325-
return false;
326-
}
327-
328-
static bool skipRelocationProcessRISCV(uint32_t &Type, uint64_t Contents) {
329-
return false;
330-
}
331-
332260
static uint64_t encodeValueX86(uint32_t Type, uint64_t Value, uint64_t PC) {
333261
switch (Type) {
334262
default:
@@ -798,19 +726,6 @@ bool Relocation::skipRelocationType(uint32_t Type) {
798726
}
799727
}
800728

801-
bool Relocation::skipRelocationProcess(uint32_t &Type, uint64_t Contents) {
802-
switch (Arch) {
803-
default:
804-
llvm_unreachable("Unsupported architecture");
805-
case Triple::aarch64:
806-
return skipRelocationProcessAArch64(Type, Contents);
807-
case Triple::riscv64:
808-
return skipRelocationProcessRISCV(Type, Contents);
809-
case Triple::x86_64:
810-
return skipRelocationProcessX86(Type, Contents);
811-
}
812-
}
813-
814729
uint64_t Relocation::encodeValue(uint32_t Type, uint64_t Value, uint64_t PC) {
815730
switch (Arch) {
816731
default:

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2229,8 +2229,6 @@ bool RewriteInstance::analyzeRelocation(
22292229
ErrorOr<uint64_t> Value =
22302230
BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize);
22312231
assert(Value && "failed to extract relocated value");
2232-
if ((Skip = Relocation::skipRelocationProcess(RType, *Value)))
2233-
return true;
22342232

22352233
ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset());
22362234
Addend = getRelocationAddend(InputFile, Rel);
@@ -2283,17 +2281,14 @@ bool RewriteInstance::analyzeRelocation(
22832281
}
22842282
}
22852283

2286-
// If no symbol has been found or if it is a relocation requiring the
2287-
// creation of a GOT entry, do not link against the symbol but against
2288-
// whatever address was extracted from the instruction itself. We are
2289-
// not creating a GOT entry as this was already processed by the linker.
2290-
// For GOT relocs, do not subtract addend as the addend does not refer
2291-
// to this instruction's target, but it refers to the target in the GOT
2292-
// entry.
2293-
if (Relocation::isGOT(RType)) {
2294-
Addend = 0;
2295-
SymbolAddress = ExtractedValue + PCRelOffset;
2296-
} else if (Relocation::isTLS(RType)) {
2284+
// GOT relocation can cause the underlying instruction to be modified by the
2285+
// linker, resulting in the extracted value being different from the actual
2286+
// symbol. It's also possible to have a GOT entry for a symbol defined in the
2287+
// binary. In the latter case, the instruction can be using the GOT version
2288+
// causing the extracted value mismatch. Similar cases can happen for TLS.
2289+
// Pass the relocation information as is to the disassembler and let it decide
2290+
// how to use it for the operand symbolization.
2291+
if (Relocation::isGOT(RType) || Relocation::isTLS(RType)) {
22972292
SkipVerification = true;
22982293
} else if (!SymbolAddress) {
22992294
assert(!IsSectionRelocation);
@@ -2666,11 +2661,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
26662661

26672662
MCSymbol *ReferencedSymbol = nullptr;
26682663
if (!IsSectionRelocation) {
2669-
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
2664+
if (BinaryData *BD = BC->getBinaryDataByName(SymbolName)) {
26702665
ReferencedSymbol = BD->getSymbol();
2671-
else if (BC->isGOTSymbol(SymbolName))
2666+
} else if (BC->isGOTSymbol(SymbolName)) {
26722667
if (BinaryData *BD = BC->getGOTSymbol())
26732668
ReferencedSymbol = BD->getSymbol();
2669+
} else if (BinaryData *BD = BC->getBinaryDataAtAddress(SymbolAddress)) {
2670+
ReferencedSymbol = BD->getSymbol();
2671+
}
26742672
}
26752673

26762674
ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address};
@@ -2798,15 +2796,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
27982796
}
27992797
}
28002798

2801-
if (ForceRelocation) {
2802-
std::string Name =
2803-
Relocation::isGOT(RType) ? "__BOLT_got_zero" : SymbolName;
2804-
ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);
2805-
SymbolAddress = 0;
2806-
if (Relocation::isGOT(RType))
2807-
Addend = Address;
2799+
if (ForceRelocation && !ReferencedBF) {
2800+
// Create the relocation symbol if it's not defined in the binary.
2801+
if (SymbolAddress == 0)
2802+
ReferencedSymbol = BC->registerNameAtAddress(SymbolName, 0, 0, 0);
2803+
28082804
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
2809-
<< SymbolName << " with addend " << Addend << '\n');
2805+
<< ReferencedSymbol->getName() << " with addend "
2806+
<< Addend << '\n');
28102807
} else if (ReferencedBF) {
28112808
ReferencedSymbol = ReferencedBF->getSymbol();
28122809
uint64_t RefFunctionOffset = 0;

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
281281
return Inst.getOpcode() == AArch64::ADR;
282282
}
283283

284-
bool isAddXri(const MCInst &Inst) const {
284+
bool isAddXri(const MCInst &Inst) const override {
285285
return Inst.getOpcode() == AArch64::ADDXri;
286286
}
287287

@@ -318,7 +318,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
318318
Inst.getOpcode() == AArch64::CBZX);
319319
}
320320

321-
bool isMOVW(const MCInst &Inst) const {
321+
bool isMOVW(const MCInst &Inst) const override {
322322
return (Inst.getOpcode() == AArch64::MOVKWi ||
323323
Inst.getOpcode() == AArch64::MOVKXi ||
324324
Inst.getOpcode() == AArch64::MOVNWi ||

bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp

Lines changed: 62 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,24 +45,15 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
4545
BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
4646
};
4747

48-
// The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
49-
// NOP+ADR. After the conversion, the linker might keep the relocations and
50-
// if we try to symbolize ADR's operand using outdated relocations, we might
51-
// get unexpected results. Hence, we check for the conversion/relaxation, and
52-
// ignore the relocation. The symbolization is done based on the PC-relative
53-
// value of the operand instead.
54-
if (Relocation && BC.MIB->isADR(Inst)) {
55-
if (Relocation->Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
56-
Relocation->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC) {
57-
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
58-
<< Twine::utohexstr(InstAddress) << '\n');
59-
Relocation = nullptr;
48+
if (Relocation) {
49+
auto AdjustedRel = adjustRelocation(*Relocation, Inst);
50+
if (AdjustedRel) {
51+
addOperand(AdjustedRel->Symbol, AdjustedRel->Addend, AdjustedRel->Type);
52+
return true;
6053
}
61-
}
6254

63-
if (Relocation) {
64-
addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
65-
return true;
55+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
56+
<< Twine::utohexstr(InstAddress) << '\n');
6657
}
6758

6859
if (!BC.MIB->hasPCRelOperand(Inst))
@@ -88,6 +79,61 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
8879
return true;
8980
}
9081

82+
std::optional<Relocation>
83+
AArch64MCSymbolizer::adjustRelocation(const Relocation &Rel,
84+
const MCInst &Inst) const {
85+
BinaryContext &BC = Function.getBinaryContext();
86+
87+
// The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
88+
// NOP+ADR. After the conversion, the linker might keep the relocations and
89+
// if we try to symbolize ADR's operand using outdated relocations, we might
90+
// get unexpected results. Hence, we check for the conversion/relaxation, and
91+
// ignore the relocation. The symbolization is done based on the PC-relative
92+
// value of the operand instead.
93+
if (BC.MIB->isADR(Inst) && (Rel.Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
94+
Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC))
95+
return std::nullopt;
96+
97+
// The linker might perform TLS relocations relaxations, such as changed TLS
98+
// access model (e.g. changed global dynamic model to initial exec), thus
99+
// changing the instructions. The static relocations might be invalid at this
100+
// point and we don't have to process these relocations anymore. More
101+
// information could be found by searching elfNN_aarch64_tls_relax in bfd.
102+
if (BC.MIB->isMOVW(Inst)) {
103+
switch (Rel.Type) {
104+
default:
105+
break;
106+
case ELF::R_AARCH64_TLSDESC_LD64_LO12:
107+
case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
108+
case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
109+
case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
110+
return std::nullopt;
111+
}
112+
}
113+
114+
if (!Relocation::isGOT(Rel.Type))
115+
return Rel;
116+
117+
Relocation AdjustedRel = Rel;
118+
if (Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && BC.MIB->isAddXri(Inst)) {
119+
// The ADRP+LDR sequence was converted into ADRP+ADD. We are looking at the
120+
// second instruction and have to use the relocation type for ADD.
121+
AdjustedRel.Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
122+
} else {
123+
// For instructions that reference GOT, ignore the referenced symbol and
124+
// use value at the relocation site. FixRelaxationPass will look at
125+
// instruction pairs and will perform necessary adjustments.
126+
ErrorOr<uint64_t> SymbolValue = BC.getSymbolValue(*Rel.Symbol);
127+
assert(SymbolValue && "Symbol value should be set");
128+
const uint64_t SymbolPageAddr = *SymbolValue & ~0xfffULL;
129+
130+
AdjustedRel.Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
131+
AdjustedRel.Addend = Rel.Value;
132+
}
133+
134+
return AdjustedRel;
135+
}
136+
91137
void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
92138
int64_t Value,
93139
uint64_t Address) {}

bolt/lib/Target/AArch64/AArch64MCSymbolizer.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "bolt/Core/BinaryFunction.h"
1313
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
14+
#include <optional>
1415

1516
namespace llvm {
1617
namespace bolt {
@@ -20,6 +21,13 @@ class AArch64MCSymbolizer : public MCSymbolizer {
2021
BinaryFunction &Function;
2122
bool CreateNewSymbols{true};
2223

24+
/// Modify relocation \p Rel based on type of the relocation and the
25+
/// instruction it was applied to. Return the new relocation info, or
26+
/// std::nullopt if the relocation should be ignored, e.g. in the case the
27+
/// instruction was modified by the linker.
28+
std::optional<Relocation> adjustRelocation(const Relocation &Rel,
29+
const MCInst &Inst) const;
30+
2331
public:
2432
AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
2533
: MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),

0 commit comments

Comments
 (0)