Skip to content

Commit 16cd5cd

Browse files
authored
[BOLT] Ignore AArch64 markers outside their sections. (#74106)
AArch64 uses $d and $x symbols to delimit data embedded in code. However, sometimes we see $d symbols, typically in .eh_frame, with addresses that belong to different sections. These occasionally fall inside .text functions and cause BOLT to stop disassembling, which in turn causes DWARF CFA processing to fail. As a workaround, we just ignore symbols with addresses outside the section they belong to. This behaviour is consistent with objdump and similar tools.
1 parent 3d0b283 commit 16cd5cd

File tree

4 files changed

+155
-3
lines changed

4 files changed

+155
-3
lines changed

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -789,9 +789,44 @@ void RewriteInstance::discoverFileObjects() {
789789
BinarySection Section(*BC, *cantFail(Sym.getSection()));
790790
return Section.isAllocatable();
791791
};
792+
auto checkSymbolInSection = [this](const SymbolInfo &S) {
793+
// Sometimes, we encounter symbols with addresses outside their section. If
794+
// such symbols happen to fall into another section, they can interfere with
795+
// disassembly. Notably, this occurs with AArch64 marker symbols ($d and $t)
796+
// that belong to .eh_frame, but end up pointing into .text.
797+
// As a workaround, we ignore all symbols that lie outside their sections.
798+
auto Section = cantFail(S.Symbol.getSection());
799+
800+
// Accept all absolute symbols.
801+
if (Section == InputFile->section_end())
802+
return true;
803+
804+
uint64_t SecStart = Section->getAddress();
805+
uint64_t SecEnd = SecStart + Section->getSize();
806+
uint64_t SymEnd = S.Address + ELFSymbolRef(S.Symbol).getSize();
807+
if (S.Address >= SecStart && SymEnd <= SecEnd)
808+
return true;
809+
810+
auto SymType = cantFail(S.Symbol.getType());
811+
// Skip warnings for common benign cases.
812+
if (opts::Verbosity < 1 && SymType == SymbolRef::ST_Other)
813+
return false; // E.g. ELF::STT_TLS.
814+
815+
auto SymName = S.Symbol.getName();
816+
auto SecName = cantFail(S.Symbol.getSection())->getName();
817+
BC->errs() << "BOLT-WARNING: ignoring symbol "
818+
<< (SymName ? *SymName : "[unnamed]") << " at 0x"
819+
<< Twine::utohexstr(S.Address) << ", which lies outside "
820+
<< (SecName ? *SecName : "[unnamed]") << "\n";
821+
822+
return false;
823+
};
792824
for (const SymbolRef &Symbol : InputFile->symbols())
793-
if (isSymbolInMemory(Symbol))
794-
SortedSymbols.push_back({cantFail(Symbol.getAddress()), Symbol});
825+
if (isSymbolInMemory(Symbol)) {
826+
SymbolInfo SymInfo{cantFail(Symbol.getAddress()), Symbol};
827+
if (checkSymbolInSection(SymInfo))
828+
SortedSymbols.push_back(SymInfo);
829+
}
795830

796831
auto CompareSymbols = [this](const SymbolInfo &A, const SymbolInfo &B) {
797832
if (A.Address != B.Address)
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
--- !ELF
2+
FileHeader:
3+
Class: ELFCLASS64
4+
Data: ELFDATA2LSB
5+
Type: ET_EXEC
6+
Machine: EM_AARCH64
7+
Entry: 0x2a0000
8+
ProgramHeaders:
9+
- Type: PT_PHDR
10+
Flags: [ PF_R ]
11+
VAddr: 0x40
12+
Align: 0x8
13+
FileSize: 0xa8
14+
MemSize: 0xa8
15+
Offset: 0x40
16+
- Type: PT_LOAD
17+
Flags: [ PF_R ]
18+
VAddr: 0x0
19+
Align: 0x10000
20+
FileSize: 0xf8
21+
MemSize: 0xf8
22+
Offset: 0x0
23+
- Type: PT_LOAD
24+
Flags: [ PF_X, PF_R ]
25+
VAddr: 0x2a0000
26+
Align: 0x10000
27+
FirstSec: .text
28+
LastSec: .ignored
29+
Sections:
30+
- Name: .text
31+
Type: SHT_PROGBITS
32+
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
33+
Address: 0x2a0000
34+
AddressAlign: 0x4
35+
Content: 400580d2c0035fd6
36+
- Name: .ignored
37+
Type: SHT_PROGBITS
38+
Flags: [ SHF_ALLOC ]
39+
Address: 0x2a0008
40+
AddressAlign: 0x8
41+
Size: 0x8
42+
- Name: .eh_frame
43+
Type: SHT_PROGBITS
44+
Flags: [ SHF_ALLOC ]
45+
Address: 0x2a0010
46+
AddressAlign: 0x8
47+
Content: 1000000000000000017a520004781e010b0c1f00140000001800000000002a0008000000000e01410e010000
48+
Symbols:
49+
- Name: func
50+
Section: .text
51+
Value: 0x2a0000
52+
Size: 0x8
53+
- Name: '$d.42'
54+
Section: .ignored
55+
Value: 0x2a0004
56+
...
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Check that marker symbols ($d, $x) denoting data embedded in code are ignored
2+
// if they fall outside their respective sections.
3+
4+
// RUN: yaml2obj %S/Inputs/spurious-marker-symbol.yaml -o %t.exe
5+
// RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
6+
// CHECK: 1 out of 1 functions were overwritten
7+
// RUN: llvm-objdump -j .text -d %t.bolt | FileCheck %s -check-prefix=CHECK-DISASM
8+
// CHECK-DISASM: func
9+
// CHECK-DISASM: 2a0000: d2800540 mov
10+
// CHECK-DISASM: 2a0004: d65f03c0 ret
11+
12+
// The YAML encodes the following assembly and debug information:
13+
14+
.text
15+
.globl func
16+
.type func, %function
17+
func:
18+
mov x0, #42
19+
// $d.42: (symbol in .ignored, with an address in .text)
20+
ret
21+
22+
// .eh_frame contains minimal DWARF with a CFA operation on the `ret`. BOLT
23+
// should ignore the spurious `$d.42`. If it doesn't, then it will stop
24+
// disassembling after the `mov` and will fail to process the second
25+
// DW_CFA_def_cfa_offset.
26+
//
27+
// CIE
28+
// length: 00000010
29+
// CIE_id: 00000000
30+
// version: 01
31+
// augmentation:
32+
// "zR" 7a 52 00
33+
// - read augmentation data
34+
// - read FDE pointer encoding
35+
// code_alignment_factor: 04
36+
// data_alignment_factor: 78 (-8)
37+
// return_address_register: 1e (r30 / lr)
38+
//
39+
// augmentation data:
40+
// length: 01
41+
// FDE pointers are absptr+sdata4 0b
42+
//
43+
// initial_instructions:
44+
// DW_CFA_def_cfa (31, 0): 0c 1f 00
45+
//
46+
// Encoding: 10000000'00000000'01'7a5200'04'78'1e'10'0b'0c1f00
47+
//
48+
// FDE
49+
// length: 00000014
50+
// CIE_pointer: 00000018 (backwards offset from here to CIE)
51+
// initial_location: 002a0000 (`func` as absptr+sdata4)
52+
// address_range: 00000008
53+
// augmentation data:
54+
// length: 00
55+
// instructions:
56+
// DW_CFA_def_cfa_offset (1) 0e 01
57+
// DW_CFA_advance_loc (1) 41 (`ret` at 0x2a0004)
58+
// DW_CFA_def_cfa_offset (1) 0e 01 Fails unless $d.42 is ignored.
59+
// DW_CFA_nop 00 00
60+
//
61+
// Encoding: 14000000'18000000'00002a00'08000000'000e0141'0e010000

llvm/include/llvm/Object/ObjectFile.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ class SymbolRef : public BasicSymbolRef {
199199
Expected<SymbolRef::Type> getType() const;
200200

201201
/// Get section this symbol is defined in reference to. Result is
202-
/// end_sections() if it is undefined or is an absolute symbol.
202+
/// section_end() if it is undefined or is an absolute symbol.
203203
Expected<section_iterator> getSection() const;
204204

205205
const ObjectFile *getObject() const;

0 commit comments

Comments
 (0)