Skip to content

Commit eaba239

Browse files
committed
arm: Fix thumb & data decoding, add tests
1 parent bd8f5ed commit eaba239

File tree

9 files changed

+5550
-54
lines changed

9 files changed

+5550
-54
lines changed

objdiff-core/src/arch/arm.rs

Lines changed: 128 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -84,18 +84,11 @@ impl ArchArm {
8484
.filter_map(|s| DisasmMode::from_symbol(&s))
8585
.collect();
8686
mapping_symbols.sort_unstable_by_key(|x| x.address);
87-
(s.index().0, mapping_symbols)
87+
(s.index().0 - 1, mapping_symbols)
8888
})
8989
.collect()
9090
}
9191

92-
fn endian(&self) -> unarm::Endian {
93-
match self.endianness {
94-
object::Endianness::Little => unarm::Endian::Little,
95-
object::Endianness::Big => unarm::Endian::Big,
96-
}
97-
}
98-
9992
fn parse_flags(&self, diff_config: &DiffObjConfig) -> unarm::ParseFlags {
10093
unarm::ParseFlags {
10194
ual: diff_config.arm_unified_syntax,
@@ -130,6 +123,31 @@ impl ArchArm {
130123
code: &[u8],
131124
diff_config: &DiffObjConfig,
132125
) -> Result<(unarm::Ins, unarm::ParsedIns)> {
126+
if ins_ref.opcode == thumb::Opcode::BlH as u16 && ins_ref.size == 4 {
127+
// Special case: combined thumb BL instruction
128+
let parse_flags = self.parse_flags(diff_config);
129+
let first_ins = thumb::Ins {
130+
code: match self.endianness {
131+
object::Endianness::Little => u16::from_le_bytes([code[0], code[1]]),
132+
object::Endianness::Big => u16::from_be_bytes([code[0], code[1]]),
133+
} as u32,
134+
op: thumb::Opcode::BlH,
135+
};
136+
let second_ins = thumb::Ins::new(
137+
match self.endianness {
138+
object::Endianness::Little => u16::from_le_bytes([code[2], code[3]]),
139+
object::Endianness::Big => u16::from_be_bytes([code[2], code[3]]),
140+
} as u32,
141+
&parse_flags,
142+
);
143+
let first_parsed = first_ins.parse(&parse_flags);
144+
let second_parsed = second_ins.parse(&parse_flags);
145+
return Ok((
146+
unarm::Ins::Thumb(first_ins),
147+
first_parsed.combine_thumb_bl(&second_parsed),
148+
));
149+
}
150+
133151
let code = match (self.endianness, ins_ref.size) {
134152
(object::Endianness::Little, 2) => u16::from_le_bytes([code[0], code[1]]) as u32,
135153
(object::Endianness::Little, 4) => {
@@ -153,11 +171,7 @@ impl ArchArm {
153171
} else {
154172
let ins = thumb::Ins { code, op: thumb::Opcode::from(ins_ref.opcode as u8) };
155173
let parsed = ins.parse(&self.parse_flags(diff_config));
156-
if ins.is_half_bl() {
157-
todo!("Combine thumb BL instructions");
158-
} else {
159-
(unarm::Ins::Thumb(ins), parsed)
160-
}
174+
(unarm::Ins::Thumb(ins), parsed)
161175
};
162176
Ok((ins, parsed_ins))
163177
}
@@ -185,60 +199,127 @@ impl Arch for ArchArm {
185199
let first_mapping_idx = mapping_symbols
186200
.binary_search_by_key(&start_addr, |x| x.address)
187201
.unwrap_or_else(|idx| idx - 1);
188-
let first_mapping = mapping_symbols[first_mapping_idx].mapping;
202+
let mut mode = mapping_symbols[first_mapping_idx].mapping;
189203

190-
let mut mappings_iter =
191-
mapping_symbols.iter().skip(first_mapping_idx + 1).take_while(|x| x.address < end_addr);
204+
let mut mappings_iter = mapping_symbols
205+
.iter()
206+
.copied()
207+
.skip(first_mapping_idx + 1)
208+
.take_while(|x| x.address < end_addr);
192209
let mut next_mapping = mappings_iter.next();
193210

194-
let ins_count = code.len() / first_mapping.instruction_size(start_addr);
211+
let ins_count = code.len() / mode.instruction_size(start_addr);
195212
let mut ops = Vec::<ScannedInstruction>::with_capacity(ins_count);
196213

197-
let endian = self.endian();
198214
let parse_flags = self.parse_flags(diff_config);
199-
let mut parser = unarm::Parser::new(first_mapping, start_addr, endian, parse_flags, code);
200-
201-
while let Some((address, ins, _parsed_ins)) = parser.next() {
202-
let size = parser.mode.instruction_size(address);
203-
if let Some(next) = next_mapping {
204-
let next_address = parser.address;
205-
if next_address >= next.address {
206-
// Change mapping
207-
parser.mode = next.mapping;
208-
next_mapping = mappings_iter.next();
209-
}
215+
216+
let mut address = start_addr;
217+
while address < end_addr {
218+
while let Some(next) = next_mapping.take_if(|x| address >= x.address) {
219+
// Change mapping
220+
mode = next.mapping;
221+
next_mapping = mappings_iter.next();
210222
}
211-
let (opcode, branch_dest) = match ins {
212-
unarm::Ins::Arm(x) => {
213-
let opcode = x.op as u16 | (1 << 15);
214-
let branch_dest = match x.op {
223+
224+
let mut ins_size = mode.instruction_size(address);
225+
let data = &code[(address - start_addr) as usize..];
226+
if data.len() < ins_size {
227+
// Push the remainder as data
228+
ops.push(ScannedInstruction {
229+
ins_ref: InstructionRef {
230+
address: address as u64,
231+
size: data.len() as u8,
232+
opcode: u16::MAX,
233+
},
234+
branch_dest: None,
235+
});
236+
break;
237+
}
238+
let code = match (self.endianness, ins_size) {
239+
(object::Endianness::Little, 2) => u16::from_le_bytes([data[0], data[1]]) as u32,
240+
(object::Endianness::Little, 4) => {
241+
u32::from_le_bytes([data[0], data[1], data[2], data[3]])
242+
}
243+
(object::Endianness::Big, 2) => u16::from_be_bytes([data[0], data[1]]) as u32,
244+
(object::Endianness::Big, 4) => {
245+
u32::from_be_bytes([data[0], data[1], data[2], data[3]])
246+
}
247+
_ => {
248+
// Invalid instruction size
249+
ops.push(ScannedInstruction {
250+
ins_ref: InstructionRef {
251+
address: address as u64,
252+
size: ins_size as u8,
253+
opcode: u16::MAX,
254+
},
255+
branch_dest: None,
256+
});
257+
address += ins_size as u32;
258+
continue;
259+
}
260+
};
261+
262+
let (opcode, branch_dest) = match mode {
263+
unarm::ParseMode::Arm => {
264+
let ins = arm::Ins::new(code, &parse_flags);
265+
let opcode = ins.op as u16 | (1 << 15);
266+
let branch_dest = match ins.op {
215267
arm::Opcode::B | arm::Opcode::Bl => {
216-
address.checked_add_signed(x.field_branch_offset())
268+
address.checked_add_signed(ins.field_branch_offset())
217269
}
218-
arm::Opcode::BlxI => address.checked_add_signed(x.field_blx_offset()),
270+
arm::Opcode::BlxI => address.checked_add_signed(ins.field_blx_offset()),
219271
_ => None,
220272
};
221273
(opcode, branch_dest)
222274
}
223-
unarm::Ins::Thumb(x) => {
224-
let opcode = x.op as u16;
225-
let branch_dest = match x.op {
275+
unarm::ParseMode::Thumb => {
276+
let ins = thumb::Ins::new(code, &parse_flags);
277+
let opcode = ins.op as u16;
278+
let branch_dest = match ins.op {
226279
thumb::Opcode::B | thumb::Opcode::Bl => {
227-
address.checked_add_signed(x.field_branch_offset_8())
280+
address.checked_add_signed(ins.field_branch_offset_8())
281+
}
282+
thumb::Opcode::BlH if data.len() >= 4 => {
283+
// Combine BL instructions
284+
let second_ins = thumb::Ins::new(
285+
match self.endianness {
286+
object::Endianness::Little => {
287+
u16::from_le_bytes([data[2], data[3]]) as u32
288+
}
289+
object::Endianness::Big => {
290+
u16::from_be_bytes([data[2], data[3]]) as u32
291+
}
292+
},
293+
&parse_flags,
294+
);
295+
if let Some(low) = match second_ins.op {
296+
thumb::Opcode::Bl => Some(second_ins.field_low_branch_offset_11()),
297+
thumb::Opcode::BlxI => Some(second_ins.field_low_blx_offset_11()),
298+
_ => None,
299+
} {
300+
ins_size = 4;
301+
address.checked_add_signed(
302+
(ins.field_high_branch_offset_11() + (low as i32)) << 9 >> 9,
303+
)
304+
} else {
305+
None
306+
}
228307
}
229308
thumb::Opcode::BLong => {
230-
address.checked_add_signed(x.field_branch_offset_11())
309+
address.checked_add_signed(ins.field_branch_offset_11())
231310
}
232311
_ => None,
233312
};
234313
(opcode, branch_dest)
235314
}
236-
unarm::Ins::Data => (u16::MAX, None),
315+
unarm::ParseMode::Data => (u16::MAX, None),
237316
};
317+
238318
ops.push(ScannedInstruction {
239-
ins_ref: InstructionRef { address: address as u64, size: size as u8, opcode },
319+
ins_ref: InstructionRef { address: address as u64, size: ins_size as u8, opcode },
240320
branch_dest: branch_dest.map(|x| x as u64),
241321
});
322+
address += ins_size as u32;
242323
}
243324

244325
Ok(ops)
@@ -391,7 +472,7 @@ fn push_args(
391472
let reloc_arg = find_reloc_arg(parsed_ins, relocation);
392473
let mut writeback = false;
393474
let mut deref = false;
394-
for (i, arg) in parsed_ins.args_iter().enumerate() {
475+
for (i, &arg) in parsed_ins.args_iter().enumerate() {
395476
// Emit punctuation before separator
396477
if deref {
397478
match arg {
@@ -463,19 +544,19 @@ fn push_args(
463544
| args::Argument::CoOpcode(value)
464545
| args::Argument::SatImm(value) => {
465546
arg_cb(InstructionPart::basic("#"))?;
466-
arg_cb(InstructionPart::unsigned(*value))?;
547+
arg_cb(InstructionPart::unsigned(value))?;
467548
}
468549
args::Argument::SImm(value)
469550
| args::Argument::OffsetImm(args::OffsetImm { post_indexed: _, value }) => {
470551
arg_cb(InstructionPart::basic("#"))?;
471-
arg_cb(InstructionPart::signed(*value))?;
552+
arg_cb(InstructionPart::signed(value))?;
472553
}
473554
args::Argument::BranchDest(value) => {
474-
arg_cb(InstructionPart::branch_dest(cur_addr.wrapping_add_signed(*value)))?;
555+
arg_cb(InstructionPart::branch_dest(cur_addr.wrapping_add_signed(value)))?;
475556
}
476557
args::Argument::CoOption(value) => {
477558
arg_cb(InstructionPart::basic("{"))?;
478-
arg_cb(InstructionPart::unsigned(*value))?;
559+
arg_cb(InstructionPart::unsigned(value))?;
479560
arg_cb(InstructionPart::basic("}"))?;
480561
}
481562
args::Argument::CoprocNum(value) => {

objdiff-core/tests/arch_arm.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,20 @@ fn read_arm() {
1515
let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config);
1616
insta::assert_snapshot!(output);
1717
}
18+
19+
#[test]
20+
#[cfg(feature = "arm")]
21+
fn read_thumb() {
22+
let diff_config = diff::DiffObjConfig { mips_register_prefix: true, ..Default::default() };
23+
let obj = obj::read::parse(include_object!("data/arm/thumb.o"), &diff_config).unwrap();
24+
insta::assert_debug_snapshot!(obj);
25+
let symbol_idx = obj
26+
.symbols
27+
.iter()
28+
.position(|s| s.name == "THUMB_BRANCH_ServerDisplay_UncategorizedMove")
29+
.unwrap();
30+
let diff = diff::code::no_diff_code(&obj, symbol_idx, &diff_config).unwrap();
31+
insta::assert_debug_snapshot!(diff.instruction_rows);
32+
let output = common::display_diff(&obj, &diff, symbol_idx, &diff_config);
33+
insta::assert_snapshot!(output);
34+
}

objdiff-core/tests/data/arm/thumb.o

31.8 KB
Binary file not shown.

objdiff-core/tests/snapshots/arch_arm__read_arm-2.snap

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1660,7 +1660,7 @@ expression: diff.instruction_rows
16601660
InstructionRef {
16611661
address: 460,
16621662
size: 4,
1663-
opcode: 32771,
1663+
opcode: 65535,
16641664
},
16651665
),
16661666
kind: None,
@@ -1673,7 +1673,7 @@ expression: diff.instruction_rows
16731673
InstructionRef {
16741674
address: 464,
16751675
size: 4,
1676-
opcode: 32771,
1676+
opcode: 65535,
16771677
},
16781678
),
16791679
kind: None,
@@ -1693,7 +1693,7 @@ expression: diff.instruction_rows
16931693
InstructionRef {
16941694
address: 468,
16951695
size: 4,
1696-
opcode: 32771,
1696+
opcode: 65535,
16971697
},
16981698
),
16991699
kind: None,

objdiff-core/tests/snapshots/arch_arm__read_arm-3.snap

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,6 @@ expression: output
107107
[(Address(408), Normal, 5), (Basic(" ~> "), Rotating(16), 0), (Opcode("mov", 32818), Normal, 10), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Unsigned(0)), Normal, 0), (Eol, Normal, 0)]
108108
[(Address(412), Normal, 5), (Spacing(4), Normal, 0), (Opcode("strb", 32899), Normal, 10), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Basic("["), Normal, 0), (Argument(Opaque("r5")), Normal, 0), (Basic(", "), Normal, 0), (Basic("#"), Normal, 0), (Argument(Signed(38)), Normal, 0), (Basic("]"), Normal, 0), (Eol, Normal, 0)]
109109
[(Address(416), Normal, 5), (Spacing(4), Normal, 0), (Opcode("ldmia", 32793), Normal, 10), (Argument(Opaque("sp")), Normal, 0), (Argument(Opaque("!")), Normal, 0), (Basic(", "), Normal, 0), (Basic("{"), Normal, 0), (Argument(Opaque("r4")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r5")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r6")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("pc")), Normal, 0), (Basic("}"), Normal, 0), (Eol, Normal, 0)]
110-
[(Address(420), Normal, 5), (Spacing(4), Normal, 0), (Opcode("andeq", 32771), Normal, 10), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r0")), Normal, 0), (Basic(" <"), Normal, 0), (Symbol(Symbol { name: "data_027e103c", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Basic(">"), Normal, 0), (Eol, Normal, 0)]
111-
[(Address(424), Normal, 5), (Basic(" ~> "), Rotating(8), 0), (Opcode("andeq", 32771), Normal, 10), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r0")), Normal, 0), (Basic(" <"), Normal, 0), (Symbol(Symbol { name: "data_027e1098", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Basic(">"), Normal, 0), (Eol, Normal, 0)]
112-
[(Address(428), Normal, 5), (Spacing(4), Normal, 0), (Opcode("andeq", 32771), Normal, 10), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r0")), Normal, 0), (Basic(", "), Normal, 0), (Argument(Opaque("r0")), Normal, 0), (Basic(" <"), Normal, 0), (Symbol(Symbol { name: "gPlayerControl", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Basic(">"), Normal, 0), (Eol, Normal, 0)]
110+
[(Address(420), Normal, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Symbol(Symbol { name: "data_027e103c", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
111+
[(Address(424), Normal, 5), (Basic(" ~> "), Rotating(8), 0), (Opcode(".word", 65535), Normal, 10), (Symbol(Symbol { name: "data_027e1098", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]
112+
[(Address(428), Normal, 5), (Spacing(4), Normal, 0), (Opcode(".word", 65535), Normal, 10), (Symbol(Symbol { name: "gPlayerControl", demangled_name: None, address: 0, size: 0, kind: Unknown, section: None, flags: FlagSet(Global | Weak), align: None, virtual_address: None }), Bright, 0), (Eol, Normal, 0)]

objdiff-core/tests/snapshots/arch_arm__read_arm.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ expression: obj
55
Object {
66
arch: ArchArm {
77
disasm_modes: {
8-
1: [
8+
0: [
99
DisasmMode {
1010
address: 0,
1111
mapping: Thumb,

0 commit comments

Comments
 (0)