Skip to content

Commit 68cd47e

Browse files
committed
[HWASan] Clean up hwasan_symbolize.
The globals are better expressed as members of the Symbolizer, and all functions operating on it should be methods instead. Also using the standard idiom of wrapping the main code in `if __name__ == '__main__'`. Reviewed By: eugenis Differential Revision: https://reviews.llvm.org/D125032
1 parent d8564dc commit 68cd47e

File tree

1 file changed

+186
-183
lines changed

1 file changed

+186
-183
lines changed

compiler-rt/lib/hwasan/scripts/hwasan_symbolize

Lines changed: 186 additions & 183 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,6 @@ if sys.version_info.major < 3:
3131
import codecs
3232
sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
3333

34-
last_access_address = None
35-
last_access_tag = None
36-
3734
# Below, a parser for a subset of ELF. It only supports 64 bit, little-endian,
3835
# and only parses what is necessary to find the build ids. It uses a memoryview
3936
# into an mmap to avoid copying.
@@ -110,6 +107,8 @@ class Symbolizer:
110107
self.__index = {}
111108
self.__link_prefixes = []
112109
self.__html = False
110+
self.__last_access_address = None
111+
self.__last_access_tag = None
113112

114113
def enable_html(self, enable):
115114
self.__html = enable
@@ -268,147 +267,81 @@ class Symbolizer:
268267
if bid is not None:
269268
self.__index[bid] = filename
270269

271-
def symbolize_line(line, symbolizer_path):
272-
#0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
273-
match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)'
274-
r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
275-
if match:
276-
frameno = match.group(2)
277-
binary = match.group(5)
278-
addr = int(match.group(6), 16)
279-
buildid = match.group(7)
280-
281-
frames = list(symbolizer.iter_call_stack(binary, buildid, addr))
282-
283-
if len(frames) > 0:
284-
symbolizer.print(
285-
symbolizer.maybe_escape(
286-
"%s#%s%s%s in " % (match.group(1), match.group(2), match.group(3),
287-
frames[0][0])
288-
) + symbolizer.maybe_linkify(frames[0][1]),
289-
escape=False)
290-
for i in range(1, len(frames)):
291-
space1 = ' ' * match.end(1)
292-
space2 = ' ' * (match.start(4) - match.end(1) - 2)
293-
symbolizer.print(
294-
symbolizer.maybe_escape("%s->%s%s in " % (space1, space2, frames[i][0]))
295-
+ symbolizer.maybe_linkify(frames[i][1]), escape=False)
270+
def symbolize_line(self, line):
271+
#0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
272+
match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)'
273+
r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
274+
if match:
275+
frameno = match.group(2)
276+
binary = match.group(5)
277+
addr = int(match.group(6), 16)
278+
buildid = match.group(7)
279+
280+
frames = list(self.iter_call_stack(binary, buildid, addr))
281+
282+
if len(frames) > 0:
283+
self.print(
284+
self.maybe_escape(
285+
"%s#%s%s%s in " % (match.group(1), match.group(2), match.group(3),
286+
frames[0][0])
287+
) + self.maybe_linkify(frames[0][1]),
288+
escape=False)
289+
for i in range(1, len(frames)):
290+
space1 = ' ' * match.end(1)
291+
space2 = ' ' * (match.start(4) - match.end(1) - 2)
292+
self.print(
293+
self.maybe_escape("%s->%s%s in " % (space1, space2, frames[i][0]))
294+
+ self.maybe_linkify(frames[i][1]), escape=False)
295+
else:
296+
self.print(line.rstrip())
296297
else:
297-
symbolizer.print(line.rstrip())
298-
else:
299-
symbolizer.print(line.rstrip())
300-
301-
def save_access_address(line):
302-
global last_access_address, last_access_tag
303-
match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
304-
if match:
305-
last_access_address = int(match.group(2), 16)
306-
match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE)
307-
if match:
308-
last_access_tag = int(match.group(2), 16)
309-
310-
def process_stack_history(line, symbolizer, ignore_tags=False):
311-
if last_access_address is None or last_access_tag is None:
312-
return
313-
if re.match(r'Previously allocated frames:', line, re.UNICODE):
314-
return True
315-
pc_mask = (1 << 48) - 1
316-
fp_mask = (1 << 20) - 1
317-
# record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
318-
match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)'
319-
r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
320-
if match:
321-
record_addr = int(match.group(2), 16)
322-
record = int(match.group(3), 16)
323-
binary = match.group(4)
324-
addr = int(match.group(5), 16)
325-
buildid = match.group(6)
326-
base_tag = (record_addr >> 3) & 0xFF
327-
fp = (record >> 48) << 4
328-
pc = record & pc_mask
329-
330-
for local in symbolizer.iter_locals(binary, addr, buildid):
331-
frame_offset = local[3]
332-
size = local[4]
333-
if frame_offset is None or size is None:
334-
continue
335-
obj_offset = (last_access_address - fp - frame_offset) & fp_mask
336-
if obj_offset >= size:
337-
continue
338-
tag_offset = local[5]
339-
if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag):
340-
continue
341-
symbolizer.print('')
342-
symbolizer.print('Potentially referenced stack object:')
343-
symbolizer.print(' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
344-
symbolizer.print(' at %s' % (local[1],))
345-
return True
346-
return False
347-
348-
parser = argparse.ArgumentParser()
349-
parser.add_argument('-d', action='store_true')
350-
parser.add_argument('-v', action='store_true')
351-
parser.add_argument('--ignore-tags', action='store_true')
352-
parser.add_argument('--symbols', action='append')
353-
parser.add_argument('--source', action='append')
354-
parser.add_argument('--index', action='store_true')
355-
parser.add_argument('--symbolizer')
356-
parser.add_argument('--linkify', type=str)
357-
parser.add_argument('--html', action='store_true')
358-
parser.add_argument('args', nargs=argparse.REMAINDER)
359-
args = parser.parse_args()
360-
361-
# Unstripped binaries location.
362-
binary_prefixes = args.symbols or []
363-
if not binary_prefixes:
364-
if 'ANDROID_PRODUCT_OUT' in os.environ:
365-
product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
366-
binary_prefixes.append(product_out)
367-
binary_prefixes.append('/')
368-
369-
for p in binary_prefixes:
370-
if not os.path.isdir(p):
371-
print("Symbols path does not exist or is not a directory:", p, file=sys.stderr)
372-
sys.exit(1)
373-
374-
# Source location.
375-
paths_to_cut = args.source or []
376-
if not paths_to_cut:
377-
paths_to_cut.append(os.getcwd() + '/')
378-
if 'ANDROID_BUILD_TOP' in os.environ:
379-
paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
380-
381-
# llvm-symbolizer binary.
382-
# 1. --symbolizer flag
383-
# 2. environment variable
384-
# 3. unsuffixed binary in the current directory
385-
# 4. if inside Android platform, prebuilt binary at a known path
386-
# 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
387-
# highest available version in $PATH
388-
symbolizer_path = args.symbolizer
389-
if not symbolizer_path:
390-
if 'LLVM_SYMBOLIZER_PATH' in os.environ:
391-
symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
392-
elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
393-
symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
394-
395-
if not symbolizer_path:
396-
s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
397-
if os.path.exists(s):
398-
symbolizer_path = s
399-
400-
if not symbolizer_path:
401-
if 'ANDROID_BUILD_TOP' in os.environ:
402-
s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
403-
if os.path.exists(s):
404-
symbolizer_path = s
405-
406-
if not symbolizer_path:
407-
for path in os.environ["PATH"].split(os.pathsep):
408-
p = os.path.join(path, 'llvm-symbolizer')
409-
if os.path.exists(p):
410-
symbolizer_path = p
411-
break
298+
self.print(line.rstrip())
299+
300+
def save_access_address(self, line):
301+
match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
302+
if match:
303+
self.__last_access_address = int(match.group(2), 16)
304+
match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE)
305+
if match:
306+
self.__last_access_tag = int(match.group(2), 16)
307+
308+
def process_stack_history(self, line, ignore_tags=False):
309+
if self.__last_access_address is None or self.__last_access_tag is None:
310+
return
311+
if re.match(r'Previously allocated frames:', line, re.UNICODE):
312+
return True
313+
pc_mask = (1 << 48) - 1
314+
fp_mask = (1 << 20) - 1
315+
# record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
316+
match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)'
317+
r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
318+
if match:
319+
record_addr = int(match.group(2), 16)
320+
record = int(match.group(3), 16)
321+
binary = match.group(4)
322+
addr = int(match.group(5), 16)
323+
buildid = match.group(6)
324+
base_tag = (record_addr >> 3) & 0xFF
325+
fp = (record >> 48) << 4
326+
pc = record & pc_mask
327+
328+
for local in self.iter_locals(binary, addr, buildid):
329+
frame_offset = local[3]
330+
size = local[4]
331+
if frame_offset is None or size is None:
332+
continue
333+
obj_offset = (self.__last_access_address - fp - frame_offset) & fp_mask
334+
if obj_offset >= size:
335+
continue
336+
tag_offset = local[5]
337+
if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != self.__last_access_tag):
338+
continue
339+
self.print('')
340+
self.print('Potentially referenced stack object:')
341+
self.print(' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
342+
self.print(' at %s' % (local[1],))
343+
return True
344+
return False
412345

413346
def extract_version(s):
414347
idx = s.rfind('-')
@@ -417,44 +350,114 @@ def extract_version(s):
417350
x = float(s[idx + 1:])
418351
return x
419352

420-
if not symbolizer_path:
421-
for path in os.environ["PATH"].split(os.pathsep):
422-
candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
423-
if len(candidates) > 0:
424-
candidates.sort(key = extract_version, reverse = True)
425-
symbolizer_path = candidates[0]
426-
break
427-
428-
if not os.path.exists(symbolizer_path):
429-
print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr)
430-
sys.exit(1)
431-
432-
if args.v:
433-
print("Looking for symbols in:")
434-
for s in binary_prefixes:
435-
print(" %s" % (s,))
436-
print("Stripping source path prefixes:")
437-
for s in paths_to_cut:
438-
print(" %s" % (s,))
439-
print("Using llvm-symbolizer binary in:\n %s" % (symbolizer_path,))
440-
print()
441-
442-
symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
443-
symbolizer.enable_html(args.html)
444-
symbolizer.enable_logging(args.d)
445-
if args.index:
446-
symbolizer.build_index()
447-
448-
if args.linkify:
449-
if not args.html:
450-
print('Need --html to --linkify', file=sys.stderr)
353+
def main():
354+
parser = argparse.ArgumentParser()
355+
parser.add_argument('-d', action='store_true')
356+
parser.add_argument('-v', action='store_true')
357+
parser.add_argument('--ignore-tags', action='store_true')
358+
parser.add_argument('--symbols', action='append')
359+
parser.add_argument('--source', action='append')
360+
parser.add_argument('--index', action='store_true')
361+
parser.add_argument('--symbolizer')
362+
parser.add_argument('--linkify', type=str)
363+
parser.add_argument('--html', action='store_true')
364+
parser.add_argument('args', nargs=argparse.REMAINDER)
365+
args = parser.parse_args()
366+
367+
# Unstripped binaries location.
368+
binary_prefixes = args.symbols or []
369+
if not binary_prefixes:
370+
if 'ANDROID_PRODUCT_OUT' in os.environ:
371+
product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
372+
binary_prefixes.append(product_out)
373+
binary_prefixes.append('/')
374+
375+
for p in binary_prefixes:
376+
if not os.path.isdir(p):
377+
print("Symbols path does not exist or is not a directory:", p, file=sys.stderr)
378+
sys.exit(1)
379+
380+
# Source location.
381+
paths_to_cut = args.source or []
382+
if not paths_to_cut:
383+
paths_to_cut.append(os.getcwd() + '/')
384+
if 'ANDROID_BUILD_TOP' in os.environ:
385+
paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
386+
387+
# llvm-symbolizer binary.
388+
# 1. --symbolizer flag
389+
# 2. environment variable
390+
# 3. unsuffixed binary in the current directory
391+
# 4. if inside Android platform, prebuilt binary at a known path
392+
# 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
393+
# highest available version in $PATH
394+
symbolizer_path = args.symbolizer
395+
if not symbolizer_path:
396+
if 'LLVM_SYMBOLIZER_PATH' in os.environ:
397+
symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
398+
elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
399+
symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
400+
401+
if not symbolizer_path:
402+
s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
403+
if os.path.exists(s):
404+
symbolizer_path = s
405+
406+
if not symbolizer_path:
407+
if 'ANDROID_BUILD_TOP' in os.environ:
408+
s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
409+
if os.path.exists(s):
410+
symbolizer_path = s
411+
412+
if not symbolizer_path:
413+
for path in os.environ["PATH"].split(os.pathsep):
414+
p = os.path.join(path, 'llvm-symbolizer')
415+
if os.path.exists(p):
416+
symbolizer_path = p
417+
break
418+
419+
if not symbolizer_path:
420+
for path in os.environ["PATH"].split(os.pathsep):
421+
candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
422+
if len(candidates) > 0:
423+
candidates.sort(key = extract_version, reverse = True)
424+
symbolizer_path = candidates[0]
425+
break
426+
427+
if not os.path.exists(symbolizer_path):
428+
print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr)
451429
sys.exit(1)
452-
symbolizer.read_linkify(args.linkify)
453-
454-
for line in sys.stdin:
455-
if sys.version_info.major < 3:
456-
line = line.decode('utf-8')
457-
save_access_address(line)
458-
if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags):
459-
continue
460-
symbolize_line(line, symbolizer_path)
430+
431+
if args.v:
432+
print("Looking for symbols in:")
433+
for s in binary_prefixes:
434+
print(" %s" % (s,))
435+
print("Stripping source path prefixes:")
436+
for s in paths_to_cut:
437+
print(" %s" % (s,))
438+
print("Using llvm-symbolizer binary in:\n %s" % (symbolizer_path,))
439+
print()
440+
441+
symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
442+
symbolizer.enable_html(args.html)
443+
symbolizer.enable_logging(args.d)
444+
if args.index:
445+
symbolizer.build_index()
446+
447+
if args.linkify:
448+
if not args.html:
449+
print('Need --html to --linkify', file=sys.stderr)
450+
sys.exit(1)
451+
symbolizer.read_linkify(args.linkify)
452+
453+
for line in sys.stdin:
454+
if sys.version_info.major < 3:
455+
line = line.decode('utf-8')
456+
symbolizer.save_access_address(line)
457+
if symbolizer.process_stack_history(line, ignore_tags=args.ignore_tags):
458+
continue
459+
symbolizer.symbolize_line(line)
460+
461+
462+
if __name__ == '__main__':
463+
main()

0 commit comments

Comments
 (0)