Skip to content

Commit 8e8ab6b

Browse files
committed
Skip label symbols when inferring symbol sizes
COFF objects in particular don't contain the size of symbols. We infer the size of these symbols by extending them to the next symbol. If a tool emits symbols for branch targets, this causes the inferred size to be too small. This checks if a symbol starts with a certain prefix (right now, just .L or LAB_), and skips over it during symbol size inference. Resolves #174
1 parent e865f3d commit 8e8ab6b

File tree

6 files changed

+204
-22
lines changed

6 files changed

+204
-22
lines changed

objdiff-core/src/arch/arm.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ impl Arch for ArchArm {
225225

226226
let mut address = start_addr;
227227
while address < end_addr {
228-
while let Some(next) = next_mapping.take_if(|x| address >= x.address) {
228+
while let Some(next) = next_mapping.filter(|x| address >= x.address) {
229229
// Change mapping
230230
mode = next.mapping;
231231
next_mapping = mappings_iter.next();

objdiff-core/src/obj/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ impl Section {
118118
Err(i) => self
119119
.relocations
120120
.get(i)
121-
.take_if(|r| r.address < ins_ref.address + ins_ref.size as u64),
121+
.filter(|r| r.address < ins_ref.address + ins_ref.size as u64),
122122
}
123123
.and_then(|relocation| {
124124
let symbol = obj.symbols.get(relocation.target_symbol)?;

objdiff-core/src/obj/read.rs

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,15 @@ fn map_symbols(
121121
Ok((symbols, symbol_indices))
122122
}
123123

124+
/// When inferring a symbol's size, we ignore symbols that start with specific prefixes. They are
125+
/// usually emitted as branch targets and do not represent the start of a function or object.
126+
fn is_local_label(symbol: &Symbol) -> bool {
127+
const LABEL_PREFIXES: &[&str] = &[".L", "LAB_"];
128+
symbol.size == 0
129+
&& symbol.flags.contains(SymbolFlag::Local)
130+
&& LABEL_PREFIXES.iter().any(|p| symbol.name.starts_with(p))
131+
}
132+
124133
fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) {
125134
// Create a sorted list of symbol indices by section
126135
let mut symbols_with_section = Vec::<usize>::with_capacity(symbols.len());
@@ -167,27 +176,28 @@ fn infer_symbol_sizes(symbols: &mut [Symbol], sections: &[Section]) {
167176
if last_end.0 == section_idx && last_end.1 > symbol.address {
168177
continue;
169178
}
170-
let next_symbol = match symbol.kind {
171-
// For function/object symbols, find the next function/object symbol (in other words:
172-
// skip over labels)
173-
SymbolKind::Function | SymbolKind::Object => loop {
174-
if iter_idx >= symbols_with_section.len() {
175-
break None;
176-
}
177-
let next_symbol = &symbols[symbols_with_section[iter_idx]];
178-
if next_symbol.section != Some(section_idx) {
179-
break None;
179+
let next_symbol = loop {
180+
if iter_idx >= symbols_with_section.len() {
181+
break None;
182+
}
183+
let next_symbol = &symbols[symbols_with_section[iter_idx]];
184+
if next_symbol.section != Some(section_idx) {
185+
break None;
186+
}
187+
if match symbol.kind {
188+
SymbolKind::Function | SymbolKind::Object => {
189+
// For function/object symbols, find the next function/object
190+
matches!(next_symbol.kind, SymbolKind::Function | SymbolKind::Object)
180191
}
181-
if let SymbolKind::Function | SymbolKind::Object = next_symbol.kind {
182-
break Some(next_symbol);
192+
SymbolKind::Unknown | SymbolKind::Section => {
193+
// For labels (or anything else), stop at any symbol
194+
true
183195
}
184-
iter_idx += 1;
185-
},
186-
// For labels (or anything else), simply use the next symbol's address
187-
SymbolKind::Unknown | SymbolKind::Section => symbols_with_section
188-
.get(iter_idx)
189-
.map(|&i| &symbols[i])
190-
.take_if(|s| s.section == Some(section_idx)),
196+
} && !is_local_label(next_symbol)
197+
{
198+
break Some(next_symbol);
199+
}
200+
iter_idx += 1;
191201
};
192202
let next_address = next_symbol.map(|s| s.address).unwrap_or_else(|| {
193203
let section = &sections[section_idx];
@@ -341,7 +351,7 @@ fn map_section_relocations(
341351
let idx = if let Some(section_symbol) = obj_file
342352
.symbol_by_index(idx)
343353
.ok()
344-
.take_if(|s| s.kind() == object::SymbolKind::Section)
354+
.filter(|s| s.kind() == object::SymbolKind::Section)
345355
{
346356
let section_index =
347357
section_symbol.section_index().context("Section symbol without section")?;

objdiff-core/tests/arch_x86.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,12 @@ fn read_x86_jumptable() {
6868
let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config);
6969
insta::assert_snapshot!(output);
7070
}
71+
72+
// Inferred size of functions should ignore symbols with specific prefixes
73+
#[test]
74+
#[cfg(feature = "x86")]
75+
fn read_x86_local_labels() {
76+
let diff_config = diff::DiffObjConfig::default();
77+
let obj = obj::read::parse(include_object!("data/x86/local_labels.obj"), &diff_config).unwrap();
78+
insta::assert_debug_snapshot!(obj);
79+
}
526 Bytes
Binary file not shown.
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
---
2+
source: objdiff-core/tests/arch_x86.rs
3+
expression: obj
4+
---
5+
Object {
6+
arch: ArchX86 {
7+
arch: X86,
8+
endianness: Little,
9+
},
10+
endianness: Little,
11+
symbols: [
12+
Symbol {
13+
name: "42b830_convertToUppercaseShiftJIS.obj",
14+
demangled_name: None,
15+
address: 0,
16+
size: 0,
17+
kind: Unknown,
18+
section: None,
19+
flags: FlagSet(Local),
20+
align: None,
21+
virtual_address: None,
22+
},
23+
Symbol {
24+
name: "[.text]",
25+
demangled_name: None,
26+
address: 0,
27+
size: 0,
28+
kind: Section,
29+
section: Some(
30+
0,
31+
),
32+
flags: FlagSet(Local),
33+
align: None,
34+
virtual_address: None,
35+
},
36+
Symbol {
37+
name: "LAB_0042b850",
38+
demangled_name: None,
39+
address: 32,
40+
size: 0,
41+
kind: Object,
42+
section: Some(
43+
0,
44+
),
45+
flags: FlagSet(Local),
46+
align: None,
47+
virtual_address: None,
48+
},
49+
Symbol {
50+
name: "LAB_0042b883",
51+
demangled_name: None,
52+
address: 83,
53+
size: 0,
54+
kind: Object,
55+
section: Some(
56+
0,
57+
),
58+
flags: FlagSet(Local),
59+
align: None,
60+
virtual_address: None,
61+
},
62+
Symbol {
63+
name: "LAB_0042b87c",
64+
demangled_name: None,
65+
address: 76,
66+
size: 0,
67+
kind: Object,
68+
section: Some(
69+
0,
70+
),
71+
flags: FlagSet(Local),
72+
align: None,
73+
virtual_address: None,
74+
},
75+
Symbol {
76+
name: "LAB_0042b884",
77+
demangled_name: None,
78+
address: 84,
79+
size: 0,
80+
kind: Object,
81+
section: Some(
82+
0,
83+
),
84+
flags: FlagSet(Local),
85+
align: None,
86+
virtual_address: None,
87+
},
88+
Symbol {
89+
name: "LAB_0042b889",
90+
demangled_name: None,
91+
address: 89,
92+
size: 0,
93+
kind: Object,
94+
section: Some(
95+
0,
96+
),
97+
flags: FlagSet(Local),
98+
align: None,
99+
virtual_address: None,
100+
},
101+
Symbol {
102+
name: "LAB_0042b845",
103+
demangled_name: None,
104+
address: 21,
105+
size: 0,
106+
kind: Object,
107+
section: Some(
108+
0,
109+
),
110+
flags: FlagSet(Local),
111+
align: None,
112+
virtual_address: None,
113+
},
114+
Symbol {
115+
name: "LAB_0042b869",
116+
demangled_name: None,
117+
address: 57,
118+
size: 0,
119+
kind: Object,
120+
section: Some(
121+
0,
122+
),
123+
flags: FlagSet(Local),
124+
align: None,
125+
virtual_address: None,
126+
},
127+
Symbol {
128+
name: "ConvertToUppercaseShiftJIS",
129+
demangled_name: None,
130+
address: 0,
131+
size: 92,
132+
kind: Function,
133+
section: Some(
134+
0,
135+
),
136+
flags: FlagSet(Global | SizeInferred),
137+
align: None,
138+
virtual_address: None,
139+
},
140+
],
141+
sections: [
142+
Section {
143+
id: ".text-0",
144+
name: ".text",
145+
address: 0,
146+
size: 92,
147+
kind: Code,
148+
data: SectionData(
149+
92,
150+
),
151+
flags: FlagSet(),
152+
align: Some(
153+
16,
154+
),
155+
relocations: [],
156+
line_info: {},
157+
virtual_address: None,
158+
},
159+
],
160+
split_meta: None,
161+
path: None,
162+
timestamp: None,
163+
}

0 commit comments

Comments
 (0)