@@ -31,9 +31,6 @@ if sys.version_info.major < 3:
31
31
import codecs
32
32
sys .stdout = codecs .getwriter ("utf-8" )(sys .stdout )
33
33
34
- last_access_address = None
35
- last_access_tag = None
36
-
37
34
# Below, a parser for a subset of ELF. It only supports 64 bit, little-endian,
38
35
# and only parses what is necessary to find the build ids. It uses a memoryview
39
36
# into an mmap to avoid copying.
@@ -110,6 +107,8 @@ class Symbolizer:
110
107
self .__index = {}
111
108
self .__link_prefixes = []
112
109
self .__html = False
110
+ self .__last_access_address = None
111
+ self .__last_access_tag = None
113
112
114
113
def enable_html (self , enable ):
115
114
self .__html = enable
@@ -268,147 +267,81 @@ class Symbolizer:
268
267
if bid is not None :
269
268
self .__index [bid ] = filename
270
269
271
- def symbolize_line (line , symbolizer_path ):
272
- #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
273
- match = re .match (r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)'
274
- r'(?:\s*\(BuildId: ([0-9a-f]+)\))?' , line , re .UNICODE )
275
- if match :
276
- frameno = match .group (2 )
277
- binary = match .group (5 )
278
- addr = int (match .group (6 ), 16 )
279
- buildid = match .group (7 )
280
-
281
- frames = list (symbolizer .iter_call_stack (binary , buildid , addr ))
282
-
283
- if len (frames ) > 0 :
284
- symbolizer .print (
285
- symbolizer .maybe_escape (
286
- "%s#%s%s%s in " % (match .group (1 ), match .group (2 ), match .group (3 ),
287
- frames [0 ][0 ])
288
- ) + symbolizer .maybe_linkify (frames [0 ][1 ]),
289
- escape = False )
290
- for i in range (1 , len (frames )):
291
- space1 = ' ' * match .end (1 )
292
- space2 = ' ' * (match .start (4 ) - match .end (1 ) - 2 )
293
- symbolizer .print (
294
- symbolizer .maybe_escape ("%s->%s%s in " % (space1 , space2 , frames [i ][0 ]))
295
- + symbolizer .maybe_linkify (frames [i ][1 ]), escape = False )
270
+ def symbolize_line (self , line ):
271
+ #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
272
+ match = re .match (r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)'
273
+ r'(?:\s*\(BuildId: ([0-9a-f]+)\))?' , line , re .UNICODE )
274
+ if match :
275
+ frameno = match .group (2 )
276
+ binary = match .group (5 )
277
+ addr = int (match .group (6 ), 16 )
278
+ buildid = match .group (7 )
279
+
280
+ frames = list (self .iter_call_stack (binary , buildid , addr ))
281
+
282
+ if len (frames ) > 0 :
283
+ self .print (
284
+ self .maybe_escape (
285
+ "%s#%s%s%s in " % (match .group (1 ), match .group (2 ), match .group (3 ),
286
+ frames [0 ][0 ])
287
+ ) + self .maybe_linkify (frames [0 ][1 ]),
288
+ escape = False )
289
+ for i in range (1 , len (frames )):
290
+ space1 = ' ' * match .end (1 )
291
+ space2 = ' ' * (match .start (4 ) - match .end (1 ) - 2 )
292
+ self .print (
293
+ self .maybe_escape ("%s->%s%s in " % (space1 , space2 , frames [i ][0 ]))
294
+ + self .maybe_linkify (frames [i ][1 ]), escape = False )
295
+ else :
296
+ self .print (line .rstrip ())
296
297
else :
297
- symbolizer .print (line .rstrip ())
298
- else :
299
- symbolizer .print (line .rstrip ())
300
-
301
- def save_access_address (line ):
302
- global last_access_address , last_access_tag
303
- match = re .match (r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ' , line , re .UNICODE )
304
- if match :
305
- last_access_address = int (match .group (2 ), 16 )
306
- match = re .match (r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)' , line , re .UNICODE )
307
- if match :
308
- last_access_tag = int (match .group (2 ), 16 )
309
-
310
- def process_stack_history (line , symbolizer , ignore_tags = False ):
311
- if last_access_address is None or last_access_tag is None :
312
- return
313
- if re .match (r'Previously allocated frames:' , line , re .UNICODE ):
314
- return True
315
- pc_mask = (1 << 48 ) - 1
316
- fp_mask = (1 << 20 ) - 1
317
- # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
318
- match = re .match (r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)'
319
- r'(?:\s*\(BuildId: ([0-9a-f]+)\))?' , line , re .UNICODE )
320
- if match :
321
- record_addr = int (match .group (2 ), 16 )
322
- record = int (match .group (3 ), 16 )
323
- binary = match .group (4 )
324
- addr = int (match .group (5 ), 16 )
325
- buildid = match .group (6 )
326
- base_tag = (record_addr >> 3 ) & 0xFF
327
- fp = (record >> 48 ) << 4
328
- pc = record & pc_mask
329
-
330
- for local in symbolizer .iter_locals (binary , addr , buildid ):
331
- frame_offset = local [3 ]
332
- size = local [4 ]
333
- if frame_offset is None or size is None :
334
- continue
335
- obj_offset = (last_access_address - fp - frame_offset ) & fp_mask
336
- if obj_offset >= size :
337
- continue
338
- tag_offset = local [5 ]
339
- if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag ):
340
- continue
341
- symbolizer .print ('' )
342
- symbolizer .print ('Potentially referenced stack object:' )
343
- symbolizer .print (' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset , local [2 ], local [0 ]))
344
- symbolizer .print (' at %s' % (local [1 ],))
345
- return True
346
- return False
347
-
348
- parser = argparse .ArgumentParser ()
349
- parser .add_argument ('-d' , action = 'store_true' )
350
- parser .add_argument ('-v' , action = 'store_true' )
351
- parser .add_argument ('--ignore-tags' , action = 'store_true' )
352
- parser .add_argument ('--symbols' , action = 'append' )
353
- parser .add_argument ('--source' , action = 'append' )
354
- parser .add_argument ('--index' , action = 'store_true' )
355
- parser .add_argument ('--symbolizer' )
356
- parser .add_argument ('--linkify' , type = str )
357
- parser .add_argument ('--html' , action = 'store_true' )
358
- parser .add_argument ('args' , nargs = argparse .REMAINDER )
359
- args = parser .parse_args ()
360
-
361
- # Unstripped binaries location.
362
- binary_prefixes = args .symbols or []
363
- if not binary_prefixes :
364
- if 'ANDROID_PRODUCT_OUT' in os .environ :
365
- product_out = os .path .join (os .environ ['ANDROID_PRODUCT_OUT' ], 'symbols' )
366
- binary_prefixes .append (product_out )
367
- binary_prefixes .append ('/' )
368
-
369
- for p in binary_prefixes :
370
- if not os .path .isdir (p ):
371
- print ("Symbols path does not exist or is not a directory:" , p , file = sys .stderr )
372
- sys .exit (1 )
373
-
374
- # Source location.
375
- paths_to_cut = args .source or []
376
- if not paths_to_cut :
377
- paths_to_cut .append (os .getcwd () + '/' )
378
- if 'ANDROID_BUILD_TOP' in os .environ :
379
- paths_to_cut .append (os .environ ['ANDROID_BUILD_TOP' ] + '/' )
380
-
381
- # llvm-symbolizer binary.
382
- # 1. --symbolizer flag
383
- # 2. environment variable
384
- # 3. unsuffixed binary in the current directory
385
- # 4. if inside Android platform, prebuilt binary at a known path
386
- # 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
387
- # highest available version in $PATH
388
- symbolizer_path = args .symbolizer
389
- if not symbolizer_path :
390
- if 'LLVM_SYMBOLIZER_PATH' in os .environ :
391
- symbolizer_path = os .environ ['LLVM_SYMBOLIZER_PATH' ]
392
- elif 'HWASAN_SYMBOLIZER_PATH' in os .environ :
393
- symbolizer_path = os .environ ['HWASAN_SYMBOLIZER_PATH' ]
394
-
395
- if not symbolizer_path :
396
- s = os .path .join (os .path .dirname (sys .argv [0 ]), 'llvm-symbolizer' )
397
- if os .path .exists (s ):
398
- symbolizer_path = s
399
-
400
- if not symbolizer_path :
401
- if 'ANDROID_BUILD_TOP' in os .environ :
402
- s = os .path .join (os .environ ['ANDROID_BUILD_TOP' ], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer' )
403
- if os .path .exists (s ):
404
- symbolizer_path = s
405
-
406
- if not symbolizer_path :
407
- for path in os .environ ["PATH" ].split (os .pathsep ):
408
- p = os .path .join (path , 'llvm-symbolizer' )
409
- if os .path .exists (p ):
410
- symbolizer_path = p
411
- break
298
+ self .print (line .rstrip ())
299
+
300
+ def save_access_address (self , line ):
301
+ match = re .match (r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ' , line , re .UNICODE )
302
+ if match :
303
+ self .__last_access_address = int (match .group (2 ), 16 )
304
+ match = re .match (r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)' , line , re .UNICODE )
305
+ if match :
306
+ self .__last_access_tag = int (match .group (2 ), 16 )
307
+
308
+ def process_stack_history (self , line , ignore_tags = False ):
309
+ if self .__last_access_address is None or self .__last_access_tag is None :
310
+ return
311
+ if re .match (r'Previously allocated frames:' , line , re .UNICODE ):
312
+ return True
313
+ pc_mask = (1 << 48 ) - 1
314
+ fp_mask = (1 << 20 ) - 1
315
+ # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
316
+ match = re .match (r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)'
317
+ r'(?:\s*\(BuildId: ([0-9a-f]+)\))?' , line , re .UNICODE )
318
+ if match :
319
+ record_addr = int (match .group (2 ), 16 )
320
+ record = int (match .group (3 ), 16 )
321
+ binary = match .group (4 )
322
+ addr = int (match .group (5 ), 16 )
323
+ buildid = match .group (6 )
324
+ base_tag = (record_addr >> 3 ) & 0xFF
325
+ fp = (record >> 48 ) << 4
326
+ pc = record & pc_mask
327
+
328
+ for local in self .iter_locals (binary , addr , buildid ):
329
+ frame_offset = local [3 ]
330
+ size = local [4 ]
331
+ if frame_offset is None or size is None :
332
+ continue
333
+ obj_offset = (self .__last_access_address - fp - frame_offset ) & fp_mask
334
+ if obj_offset >= size :
335
+ continue
336
+ tag_offset = local [5 ]
337
+ if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != self .__last_access_tag ):
338
+ continue
339
+ self .print ('' )
340
+ self .print ('Potentially referenced stack object:' )
341
+ self .print (' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset , local [2 ], local [0 ]))
342
+ self .print (' at %s' % (local [1 ],))
343
+ return True
344
+ return False
412
345
413
346
def extract_version (s ):
414
347
idx = s .rfind ('-' )
@@ -417,44 +350,114 @@ def extract_version(s):
417
350
x = float (s [idx + 1 :])
418
351
return x
419
352
420
- if not symbolizer_path :
421
- for path in os .environ ["PATH" ].split (os .pathsep ):
422
- candidates = glob .glob (os .path .join (path , 'llvm-symbolizer-*' ))
423
- if len (candidates ) > 0 :
424
- candidates .sort (key = extract_version , reverse = True )
425
- symbolizer_path = candidates [0 ]
426
- break
427
-
428
- if not os .path .exists (symbolizer_path ):
429
- print ("Symbolizer path does not exist:" , symbolizer_path , file = sys .stderr )
430
- sys .exit (1 )
431
-
432
- if args .v :
433
- print ("Looking for symbols in:" )
434
- for s in binary_prefixes :
435
- print (" %s" % (s ,))
436
- print ("Stripping source path prefixes:" )
437
- for s in paths_to_cut :
438
- print (" %s" % (s ,))
439
- print ("Using llvm-symbolizer binary in:\n %s" % (symbolizer_path ,))
440
- print ()
441
-
442
- symbolizer = Symbolizer (symbolizer_path , binary_prefixes , paths_to_cut )
443
- symbolizer .enable_html (args .html )
444
- symbolizer .enable_logging (args .d )
445
- if args .index :
446
- symbolizer .build_index ()
447
-
448
- if args .linkify :
449
- if not args .html :
450
- print ('Need --html to --linkify' , file = sys .stderr )
353
+ def main ():
354
+ parser = argparse .ArgumentParser ()
355
+ parser .add_argument ('-d' , action = 'store_true' )
356
+ parser .add_argument ('-v' , action = 'store_true' )
357
+ parser .add_argument ('--ignore-tags' , action = 'store_true' )
358
+ parser .add_argument ('--symbols' , action = 'append' )
359
+ parser .add_argument ('--source' , action = 'append' )
360
+ parser .add_argument ('--index' , action = 'store_true' )
361
+ parser .add_argument ('--symbolizer' )
362
+ parser .add_argument ('--linkify' , type = str )
363
+ parser .add_argument ('--html' , action = 'store_true' )
364
+ parser .add_argument ('args' , nargs = argparse .REMAINDER )
365
+ args = parser .parse_args ()
366
+
367
+ # Unstripped binaries location.
368
+ binary_prefixes = args .symbols or []
369
+ if not binary_prefixes :
370
+ if 'ANDROID_PRODUCT_OUT' in os .environ :
371
+ product_out = os .path .join (os .environ ['ANDROID_PRODUCT_OUT' ], 'symbols' )
372
+ binary_prefixes .append (product_out )
373
+ binary_prefixes .append ('/' )
374
+
375
+ for p in binary_prefixes :
376
+ if not os .path .isdir (p ):
377
+ print ("Symbols path does not exist or is not a directory:" , p , file = sys .stderr )
378
+ sys .exit (1 )
379
+
380
+ # Source location.
381
+ paths_to_cut = args .source or []
382
+ if not paths_to_cut :
383
+ paths_to_cut .append (os .getcwd () + '/' )
384
+ if 'ANDROID_BUILD_TOP' in os .environ :
385
+ paths_to_cut .append (os .environ ['ANDROID_BUILD_TOP' ] + '/' )
386
+
387
+ # llvm-symbolizer binary.
388
+ # 1. --symbolizer flag
389
+ # 2. environment variable
390
+ # 3. unsuffixed binary in the current directory
391
+ # 4. if inside Android platform, prebuilt binary at a known path
392
+ # 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
393
+ # highest available version in $PATH
394
+ symbolizer_path = args .symbolizer
395
+ if not symbolizer_path :
396
+ if 'LLVM_SYMBOLIZER_PATH' in os .environ :
397
+ symbolizer_path = os .environ ['LLVM_SYMBOLIZER_PATH' ]
398
+ elif 'HWASAN_SYMBOLIZER_PATH' in os .environ :
399
+ symbolizer_path = os .environ ['HWASAN_SYMBOLIZER_PATH' ]
400
+
401
+ if not symbolizer_path :
402
+ s = os .path .join (os .path .dirname (sys .argv [0 ]), 'llvm-symbolizer' )
403
+ if os .path .exists (s ):
404
+ symbolizer_path = s
405
+
406
+ if not symbolizer_path :
407
+ if 'ANDROID_BUILD_TOP' in os .environ :
408
+ s = os .path .join (os .environ ['ANDROID_BUILD_TOP' ], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer' )
409
+ if os .path .exists (s ):
410
+ symbolizer_path = s
411
+
412
+ if not symbolizer_path :
413
+ for path in os .environ ["PATH" ].split (os .pathsep ):
414
+ p = os .path .join (path , 'llvm-symbolizer' )
415
+ if os .path .exists (p ):
416
+ symbolizer_path = p
417
+ break
418
+
419
+ if not symbolizer_path :
420
+ for path in os .environ ["PATH" ].split (os .pathsep ):
421
+ candidates = glob .glob (os .path .join (path , 'llvm-symbolizer-*' ))
422
+ if len (candidates ) > 0 :
423
+ candidates .sort (key = extract_version , reverse = True )
424
+ symbolizer_path = candidates [0 ]
425
+ break
426
+
427
+ if not os .path .exists (symbolizer_path ):
428
+ print ("Symbolizer path does not exist:" , symbolizer_path , file = sys .stderr )
451
429
sys .exit (1 )
452
- symbolizer .read_linkify (args .linkify )
453
-
454
- for line in sys .stdin :
455
- if sys .version_info .major < 3 :
456
- line = line .decode ('utf-8' )
457
- save_access_address (line )
458
- if process_stack_history (line , symbolizer , ignore_tags = args .ignore_tags ):
459
- continue
460
- symbolize_line (line , symbolizer_path )
430
+
431
+ if args .v :
432
+ print ("Looking for symbols in:" )
433
+ for s in binary_prefixes :
434
+ print (" %s" % (s ,))
435
+ print ("Stripping source path prefixes:" )
436
+ for s in paths_to_cut :
437
+ print (" %s" % (s ,))
438
+ print ("Using llvm-symbolizer binary in:\n %s" % (symbolizer_path ,))
439
+ print ()
440
+
441
+ symbolizer = Symbolizer (symbolizer_path , binary_prefixes , paths_to_cut )
442
+ symbolizer .enable_html (args .html )
443
+ symbolizer .enable_logging (args .d )
444
+ if args .index :
445
+ symbolizer .build_index ()
446
+
447
+ if args .linkify :
448
+ if not args .html :
449
+ print ('Need --html to --linkify' , file = sys .stderr )
450
+ sys .exit (1 )
451
+ symbolizer .read_linkify (args .linkify )
452
+
453
+ for line in sys .stdin :
454
+ if sys .version_info .major < 3 :
455
+ line = line .decode ('utf-8' )
456
+ symbolizer .save_access_address (line )
457
+ if symbolizer .process_stack_history (line , ignore_tags = args .ignore_tags ):
458
+ continue
459
+ symbolizer .symbolize_line (line )
460
+
461
+
462
+ if __name__ == '__main__' :
463
+ main ()
0 commit comments