Skip to content

Commit 7f43b6b

Browse files
committed
[BOLT] Ignore AArch64 markers outside their sections.
AArch64 uses $d and $x symbols to delimit data embedded in code. However, sometimes we see $d symbols, typically in .eh_frame, with addresses that belong to different sections. These occasionally fall inside .text functions and cause BOLT to stop disassembling, which in turn causes DWARF CFA processing to fail. As a workaround, we just ignore symbols with addresses outside the section they belong to. This behaviour is consistent with objdump and similar tools.
1 parent 2b0b0ad commit 7f43b6b

File tree

3 files changed

+132
-1
lines changed

3 files changed

+132
-1
lines changed

bolt/lib/Rewrite/RewriteInstance.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -865,14 +865,28 @@ void RewriteInstance::discoverFileObjects() {
865865

866866
std::vector<MarkerSym> SortedMarkerSymbols;
867867
auto addExtraDataMarkerPerSymbol = [&]() {
868+
// Some ELFs have marker symbols with addresses outside their section.
869+
// This occurs, for example, with some `.eh_frame` symbols, and sometimes
870+
// (if rarely) they interfere with the disassembly of `.text` functions. As
871+
// a workaround, we ignore all symbols that lie outside their sections.
872+
auto considerSymbol = [](const SymbolInfo &S) {
873+
auto SectionOrError = S.Symbol.getSection();
874+
if (SectionOrError) {
875+
uint64_t SecStart = (*SectionOrError)->getAddress();
876+
uint64_t SecEnd = SecStart + (*SectionOrError)->getSize();
877+
return (S.Address >= SecStart && S.Address < SecEnd);
878+
}
879+
return true;
880+
};
881+
868882
bool IsData = false;
869883
uint64_t LastAddr = 0;
870884
for (const auto &SymInfo : SortedSymbols) {
871885
if (LastAddr == SymInfo.Address) // don't repeat markers
872886
continue;
873887

874888
MarkerSymType MarkerType = BC->getMarkerType(SymInfo.Symbol);
875-
if (MarkerType != MarkerSymType::NONE) {
889+
if (considerSymbol(SymInfo) && MarkerType != MarkerSymType::NONE) {
876890
SortedMarkerSymbols.push_back(MarkerSym{SymInfo.Address, MarkerType});
877891
LastAddr = SymInfo.Address;
878892
IsData = MarkerType == MarkerSymType::DATA;
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
--- !ELF
2+
FileHeader:
3+
Class: ELFCLASS64
4+
Data: ELFDATA2LSB
5+
Type: ET_EXEC
6+
Machine: EM_AARCH64
7+
Entry: 0x2a0000
8+
ProgramHeaders:
9+
- Type: PT_PHDR
10+
Flags: [ PF_R ]
11+
VAddr: 0x40
12+
Align: 0x8
13+
FileSize: 0xa8
14+
MemSize: 0xa8
15+
Offset: 0x40
16+
- Type: PT_LOAD
17+
Flags: [ PF_R ]
18+
VAddr: 0x0
19+
Align: 0x10000
20+
FileSize: 0xf8
21+
MemSize: 0xf8
22+
Offset: 0x0
23+
- Type: PT_LOAD
24+
Flags: [ PF_X, PF_R ]
25+
VAddr: 0x2a0000
26+
Align: 0x10000
27+
FirstSec: .text
28+
LastSec: .ignored
29+
Sections:
30+
- Name: .text
31+
Type: SHT_PROGBITS
32+
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
33+
Address: 0x2a0000
34+
AddressAlign: 0x4
35+
Content: 400580d2c0035fd6
36+
- Name: .ignored
37+
Type: SHT_PROGBITS
38+
Flags: [ SHF_ALLOC ]
39+
Address: 0x2a0008
40+
AddressAlign: 0x8
41+
Size: 0x8
42+
- Name: .eh_frame
43+
Type: SHT_PROGBITS
44+
Flags: [ SHF_ALLOC ]
45+
Address: 0x2a0010
46+
AddressAlign: 0x8
47+
Content: 1000000000000000017a520004781e010b0c1f00140000001800000000002a0008000000000e01410e010000
48+
Symbols:
49+
- Name: func
50+
Section: .text
51+
Value: 0x2a0000
52+
Size: 0x8
53+
- Name: '$d.42'
54+
Section: .ignored
55+
Value: 0x2a0004
56+
...
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Check that marker symbols ($d, $x) denoting data embedded in code are ignored
2+
// if they fall outside their respective sections.
3+
4+
// RUN: yaml2obj %S/Inputs/spurious-marker-symbol.yaml -o %t.exe
5+
// RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
6+
// CHECK: 1 out of 1 functions were overwritten
7+
// RUN: llvm-objdump -j .text -d %t.bolt | FileCheck %s -check-prefix=CHECK-DISASM
8+
// CHECK-DISASM: func
9+
// CHECK-DISASM: 2a0000: d2800540 mov
10+
// CHECK-DISASM: 2a0004: d65f03c0 ret
11+
12+
// The YAML encodes the following assembly and debug information:
13+
14+
.text
15+
.globl func
16+
.type func, %function
17+
func:
18+
mov x0, #42
19+
// $d.42: (symbol in .ignored, with an address in .text)
20+
ret
21+
22+
// .eh_frame contains minimal DWARF with a CFA operation on the `ret`. BOLT
23+
// should ignore the spurious `$d.42`. If it doesn't, then it will stop
24+
// disassembling after the `mov` and will fail to process the second
25+
// DW_CFA_def_cfa_offset.
26+
//
27+
// CIE
28+
// length: 00000010
29+
// CIE_id: 00000000
30+
// version: 01
31+
// augmentation:
32+
// "zR" 7a 52 00
33+
// - read augmentation data
34+
// - read FDE pointer encoding
35+
// code_alignment_factor: 04
36+
// data_alignment_factor: 78 (-8)
37+
// return_address_register: 1e (r30 / lr)
38+
//
39+
// augmentation data:
40+
// length: 01
41+
// FDE pointers are absptr+sdata4 0b
42+
//
43+
// initial_instructions:
44+
// DW_CFA_def_cfa (31, 0): 0c 1f 00
45+
//
46+
// Encoding: 10000000'00000000'01'7a5200'04'78'1e'10'0b'0c1f00
47+
//
48+
// FDE
49+
// length: 00000014
50+
// CIE_pointer: 00000018 (backwards offset from here to CIE)
51+
// initial_location: 002a0000 (`func` as absptr+sdata4)
52+
// address_range: 00000008
53+
// augmentation data:
54+
// length: 00
55+
// instructions:
56+
// DW_CFA_def_cfa_offset (1) 0e 01
57+
// DW_CFA_advance_loc (1) 41 (`ret` at 0x2a0004)
58+
// DW_CFA_def_cfa_offset (1) 0e 01 Fails unless $d.42 is ignored.
59+
// DW_CFA_nop 00 00
60+
//
61+
// Encoding: 14000000'18000000'00002a00'08000000'000e0141'0e010000

0 commit comments

Comments
 (0)