Skip to content

Commit a0570e7

Browse files
committed
[HWASan] allow symbolizer script to index binaries by build id.
Tested on an example callstack with misplaced binaries from Android. Tested Regex against callstack without Build ID to confirm it still works. Reviewed By: eugenis Differential Revision: https://reviews.llvm.org/D123437
1 parent 06285fc commit a0570e7

File tree

1 file changed

+90
-5
lines changed

1 file changed

+90
-5
lines changed

compiler-rt/lib/hwasan/scripts/hwasan_symbolize

Lines changed: 90 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ import sys
2121
import string
2222
import subprocess
2323
import argparse
24+
import mmap
25+
import struct
26+
import os
2427

2528
if sys.version_info.major < 3:
2629
# Simulate Python 3.x behaviour of defaulting to UTF-8 for print. This is
@@ -31,6 +34,71 @@ if sys.version_info.major < 3:
3134
last_access_address = None
3235
last_access_tag = None
3336

37+
# Below, a parser for a subset of ELF. It only supports 64 bit, little-endian,
38+
# and only parses what is necessary to find the build ids. It uses a memoryview
39+
# into an mmap to avoid copying.
40+
Ehdr_size = 64
41+
e_shnum_offset = 60
42+
e_shoff_offset = 40
43+
44+
Shdr_size = 64
45+
sh_type_offset = 4
46+
sh_offset_offset = 24
47+
sh_size_offset = 32
48+
SHT_NOTE = 7
49+
50+
Nhdr_size = 12
51+
NT_GNU_BUILD_ID = 3
52+
53+
def align_up(size, alignment):
54+
return (size + alignment - 1) & ~(alignment - 1)
55+
56+
def handle_Nhdr(mv, sh_size):
57+
offset = 0
58+
while offset < sh_size:
59+
n_namesz, n_descsz, n_type = struct.unpack_from('<III', buffer=mv,
60+
offset=offset)
61+
if (n_type == NT_GNU_BUILD_ID and n_namesz == 4 and
62+
mv[offset + Nhdr_size: offset + Nhdr_size + 4] == b"GNU\x00"):
63+
value = mv[offset + Nhdr_size + 4: offset + Nhdr_size + 4 + n_descsz]
64+
return value.hex()
65+
offset += Nhdr_size + align_up(n_namesz, 4) + align_up(n_descsz, 4)
66+
return None
67+
68+
def handle_Shdr(mv):
69+
sh_type, = struct.unpack_from('<I', buffer=mv, offset=sh_type_offset)
70+
if sh_type != SHT_NOTE:
71+
return None, None
72+
sh_offset, = struct.unpack_from('<Q', buffer=mv, offset=sh_offset_offset)
73+
sh_size, = struct.unpack_from('<Q', buffer=mv, offset=sh_size_offset)
74+
return sh_offset, sh_size
75+
76+
def handle_elf(mv):
77+
# \x02 is ELFCLASS64, \x01 is ELFDATA2LSB. HWASan currently only works on
78+
# 64-bit little endian platforms (x86_64 and ARM64). If this changes, we will
79+
# have to extend the parsing code.
80+
if mv[:6] != b'\x7fELF\x02\x01':
81+
return None
82+
e_shnum, = struct.unpack_from('<H', buffer=mv, offset=e_shnum_offset)
83+
e_shoff, = struct.unpack_from('<Q', buffer=mv, offset=e_shoff_offset)
84+
for i in range(0, e_shnum):
85+
start = e_shoff + i * Shdr_size
86+
sh_offset, sh_size = handle_Shdr(mv[start: start + Shdr_size])
87+
if sh_offset is None:
88+
continue
89+
note_hdr = mv[sh_offset: sh_offset + sh_size]
90+
result = handle_Nhdr(note_hdr, sh_size)
91+
if result is not None:
92+
return result
93+
94+
def get_buildid(filename):
95+
with open(filename, "r") as fd:
96+
if os.fstat(fd.fileno()).st_size < Ehdr_size:
97+
return None
98+
with mmap.mmap(fd.fileno(), 0, access=mmap.ACCESS_READ) as m:
99+
with memoryview(m) as mv:
100+
return handle_elf(mv)
101+
34102
class Symbolizer:
35103
def __init__(self, path, binary_prefixes, paths_to_cut):
36104
self.__pipe = None
@@ -39,6 +107,7 @@ class Symbolizer:
39107
self.__paths_to_cut = paths_to_cut
40108
self.__log = False
41109
self.__warnings = set()
110+
self.__index = {}
42111

43112
def enable_logging(self, enable):
44113
self.__log = enable
@@ -77,9 +146,12 @@ class Symbolizer:
77146
file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
78147
return file_name
79148

80-
def __process_binary_name(self, name):
149+
def __process_binary_name(self, name, buildid=None):
81150
if name.startswith('/'):
82151
name = name[1:]
152+
if buildid is not None and buildid in self.__index:
153+
return self.__index[buildid]
154+
83155
for p in self.__binary_prefixes:
84156
full_path = os.path.join(p, name)
85157
if os.path.exists(full_path):
@@ -121,10 +193,10 @@ class Symbolizer:
121193
except Symbolizer.__EOF:
122194
pass
123195

124-
def iter_call_stack(self, binary, addr):
196+
def iter_call_stack(self, binary, buildid, addr):
125197
self.__open_pipe()
126198
p = self.__pipe
127-
binary = self.__process_binary_name(binary)
199+
binary = self.__process_binary_name(binary, buildid)
128200
if not binary:
129201
return
130202
self.__write("CODE %s %s" % (binary, addr))
@@ -137,15 +209,25 @@ class Symbolizer:
137209
except Symbolizer.__EOF:
138210
pass
139211

212+
def build_index(self):
213+
for p in self.__binary_prefixes:
214+
for dname, _, fnames in os.walk(p):
215+
for fn in fnames:
216+
filename = os.path.join(dname, fn)
217+
bid = get_buildid(filename)
218+
if bid is not None:
219+
self.__index[bid] = filename
220+
140221
def symbolize_line(line, symbolizer_path):
141222
#0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
142-
match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE)
223+
match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
143224
if match:
144225
frameno = match.group(2)
145226
binary = match.group(5)
146227
addr = int(match.group(6), 16)
228+
buildid = match.group(7)
147229

148-
frames = list(symbolizer.iter_call_stack(binary, addr))
230+
frames = list(symbolizer.iter_call_stack(binary, buildid, addr))
149231

150232
if len(frames) > 0:
151233
print("%s#%s%s%s in %s" % (match.group(1), match.group(2),
@@ -210,6 +292,7 @@ parser.add_argument('-v', action='store_true')
210292
parser.add_argument('--ignore-tags', action='store_true')
211293
parser.add_argument('--symbols', action='append')
212294
parser.add_argument('--source', action='append')
295+
parser.add_argument('--index', action='store_true')
213296
parser.add_argument('--symbolizer')
214297
parser.add_argument('args', nargs=argparse.REMAINDER)
215298
args = parser.parse_args()
@@ -297,6 +380,8 @@ if args.v:
297380

298381
symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
299382
symbolizer.enable_logging(args.d)
383+
if args.index:
384+
symbolizer.build_index()
300385

301386
for line in sys.stdin:
302387
if sys.version_info.major < 3:

0 commit comments

Comments
 (0)