Skip to content

Commit cd953d9

Browse files
committed
Symbolicate aarch64 adrp+add pc-relative addr in disass
On aarch64 a two instruction sequence is used to calculate a pc-relative address, add some state to the DisassemblerLLVMC symbolicator so it can track the necessary data across the two instructions and compute the address being calculated. Differential Revision: https://reviews.llvm.org/D107213 rdar://49119253 (cherry picked from commit 7150b56)
1 parent 6d79e1a commit cd953d9

File tree

6 files changed

+1009
-1
lines changed

6 files changed

+1009
-1
lines changed

lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1030,7 +1030,8 @@ bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {
10301030
DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch,
10311031
const char *flavor_string)
10321032
: Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr),
1033-
m_data_from_file(false) {
1033+
m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS),
1034+
m_adrp_insn() {
10341035
if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {
10351036
m_flavor.assign("default");
10361037
}
@@ -1310,6 +1311,46 @@ const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
13101311
Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr;
13111312
Address value_so_addr;
13121313
Address pc_so_addr;
1314+
if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 ||
1315+
target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be ||
1316+
target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) {
1317+
if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) {
1318+
m_adrp_address = pc;
1319+
m_adrp_insn = value;
1320+
*name = nullptr;
1321+
*type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1322+
return nullptr;
1323+
}
1324+
// If this instruction is an ADD and
1325+
// the previous instruction was an ADRP and
1326+
// the ADRP's register and this ADD's register are the same,
1327+
// then this is a pc-relative address calculation.
1328+
if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri &&
1329+
m_adrp_insn.hasValue() && m_adrp_address == pc - 4 &&
1330+
(m_adrp_insn.getValue() & 0x1f) == ((value >> 5) & 0x1f)) {
1331+
uint32_t addxri_inst;
1332+
uint64_t adrp_imm, addxri_imm;
1333+
// Get immlo and immhi bits, OR them together to get the ADRP imm
1334+
// value.
1335+
adrp_imm = ((m_adrp_insn.getValue() & 0x00ffffe0) >> 3) |
1336+
((m_adrp_insn.getValue() >> 29) & 0x3);
1337+
// if high bit of immhi after right-shifting set, sign extend
1338+
if (adrp_imm & (1ULL << 20))
1339+
adrp_imm |= ~((1ULL << 21) - 1);
1340+
1341+
addxri_inst = value;
1342+
addxri_imm = (addxri_inst >> 10) & 0xfff;
1343+
// check if 'sh' bit is set, shift imm value up if so
1344+
// (this would make no sense, ADRP already gave us this part)
1345+
if ((addxri_inst >> (12 + 5 + 5)) & 1)
1346+
addxri_imm <<= 12;
1347+
value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) +
1348+
addxri_imm;
1349+
}
1350+
m_adrp_address = LLDB_INVALID_ADDRESS;
1351+
m_adrp_insn.reset();
1352+
}
1353+
13131354
if (m_inst->UsingFileAddress()) {
13141355
ModuleSP module_sp(m_inst->GetAddress().GetModule());
13151356
if (module_sp) {
@@ -1371,6 +1412,12 @@ const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
13711412
}
13721413
}
13731414

1415+
// TODO: llvm-objdump sets the type_ptr to the
1416+
// LLVMDisassembler_ReferenceType_Out_* values
1417+
// based on where value_so_addr is pointing, with
1418+
// Mach-O specific augmentations in MachODump.cpp. e.g.
1419+
// see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand
1420+
// handles.
13741421
*type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
13751422
*name = nullptr;
13761423
return nullptr;

lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "lldb/Core/Address.h"
1717
#include "lldb/Core/Disassembler.h"
1818
#include "lldb/Core/PluginManager.h"
19+
#include "llvm/ADT/Optional.h"
1920

2021
class InstructionLLVMC;
2122

@@ -73,6 +74,12 @@ class DisassemblerLLVMC : public lldb_private::Disassembler {
7374
InstructionLLVMC *m_inst;
7475
std::mutex m_mutex;
7576
bool m_data_from_file;
77+
// Save the AArch64 ADRP instruction word and address it was at,
78+
// in case the next instruction is an ADD to the same register;
79+
// this is a pc-relative address calculation and we need both
80+
// parts to calculate the symbolication.
81+
lldb::addr_t m_adrp_address;
82+
llvm::Optional<uint32_t> m_adrp_insn;
7683

7784
// Since we need to make two actual MC Disassemblers for ARM (ARM & THUMB),
7885
// and there's a bit of goo to set up and own in the MC disassembler world,
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
"""
2+
Test that the arm64 ADRP + ADD pc-relative addressing pair is symbolicated.
3+
"""
4+
5+
from lldbsuite.test.decorators import *
6+
from lldbsuite.test.lldbtest import *
7+
from lldbsuite.test import lldbutil
8+
9+
class TestAArch64AdrpAdd(TestBase):
10+
11+
mydir = TestBase.compute_mydir(__file__)
12+
13+
@no_debug_info_test
14+
def test_arm64(self):
15+
src_dir = self.getSourceDir()
16+
yaml_path = os.path.join(src_dir, "a.out-arm64.yaml")
17+
obj_path = self.getBuildArtifact("a.out-arm64")
18+
self.yaml2obj(yaml_path, obj_path)
19+
20+
target = self.dbg.CreateTarget(obj_path)
21+
self.assertTrue(target, VALID_TARGET)
22+
23+
mains = target.FindFunctions("main")
24+
for f in mains.symbols:
25+
binaryname = f.GetStartAddress().GetModule().GetFileSpec().GetFilename()
26+
if binaryname == "a.out-arm64":
27+
self.disassemble_check_for_hi_and_foo(target, f, binaryname)
28+
29+
@no_debug_info_test
30+
def test_arm64_32(self):
31+
src_dir = self.getSourceDir()
32+
yaml_path = os.path.join(src_dir, "a.out-arm64_32.yaml")
33+
obj_path = self.getBuildArtifact("a.out-arm64_32")
34+
self.yaml2obj(yaml_path, obj_path)
35+
36+
target = self.dbg.CreateTarget(obj_path)
37+
self.assertTrue(target, VALID_TARGET)
38+
39+
mains = target.FindFunctions("main")
40+
for f in mains.symbols:
41+
binaryname = f.GetStartAddress().GetModule().GetFileSpec().GetFilename()
42+
if binaryname == "a.out-arm64_32":
43+
self.disassemble_check_for_hi_and_foo(target, f, binaryname)
44+
45+
def disassemble_check_for_hi_and_foo(self, target, func, binaryname):
46+
insns = func.GetInstructions(target)
47+
found_hi_string = False
48+
found_foo = False
49+
50+
# The binary has an ADRP + ADD instruction pair which load
51+
# the pc-relative address of a c-string, and loads the address
52+
# of a function into a function pointer. lldb should show
53+
# that c-string and the name of that function in the disassembly
54+
# comment field.
55+
for i in insns:
56+
if "HI" in i.GetComment(target):
57+
found_hi_string = True
58+
if "foo" in i.GetComment(target):
59+
found_foo = True
60+
if found_hi_string == False or found_foo == False:
61+
print('Did not find "HI" string or "foo" in disassembly symbolication in %s' % binaryname)
62+
if self.TraceOn():
63+
strm = lldb.SBStream()
64+
insns.GetDescription(strm)
65+
print('Disassembly of main(), looking for "HI" and "foo" in comments:')
66+
print(strm.GetData())
67+
self.assertTrue(found_hi_string)
68+
self.assertTrue(found_foo)

0 commit comments

Comments
 (0)