@@ -21,6 +21,9 @@ import sys
21
21
import string
22
22
import subprocess
23
23
import argparse
24
+ import mmap
25
+ import struct
26
+ import os
24
27
25
28
if sys .version_info .major < 3 :
26
29
# Simulate Python 3.x behaviour of defaulting to UTF-8 for print. This is
@@ -31,6 +34,71 @@ if sys.version_info.major < 3:
31
34
last_access_address = None
32
35
last_access_tag = None
33
36
37
+ # Below, a parser for a subset of ELF. It only supports 64 bit, little-endian,
38
+ # and only parses what is necessary to find the build ids. It uses a memoryview
39
+ # into an mmap to avoid copying.
40
+ Ehdr_size = 64
41
+ e_shnum_offset = 60
42
+ e_shoff_offset = 40
43
+
44
+ Shdr_size = 64
45
+ sh_type_offset = 4
46
+ sh_offset_offset = 24
47
+ sh_size_offset = 32
48
+ SHT_NOTE = 7
49
+
50
+ Nhdr_size = 12
51
+ NT_GNU_BUILD_ID = 3
52
+
53
+ def align_up (size , alignment ):
54
+ return (size + alignment - 1 ) & ~ (alignment - 1 )
55
+
56
+ def handle_Nhdr (mv , sh_size ):
57
+ offset = 0
58
+ while offset < sh_size :
59
+ n_namesz , n_descsz , n_type = struct .unpack_from ('<III' , buffer = mv ,
60
+ offset = offset )
61
+ if (n_type == NT_GNU_BUILD_ID and n_namesz == 4 and
62
+ mv [offset + Nhdr_size : offset + Nhdr_size + 4 ] == b"GNU\x00 " ):
63
+ value = mv [offset + Nhdr_size + 4 : offset + Nhdr_size + 4 + n_descsz ]
64
+ return value .hex ()
65
+ offset += Nhdr_size + align_up (n_namesz , 4 ) + align_up (n_descsz , 4 )
66
+ return None
67
+
68
+ def handle_Shdr (mv ):
69
+ sh_type , = struct .unpack_from ('<I' , buffer = mv , offset = sh_type_offset )
70
+ if sh_type != SHT_NOTE :
71
+ return None , None
72
+ sh_offset , = struct .unpack_from ('<Q' , buffer = mv , offset = sh_offset_offset )
73
+ sh_size , = struct .unpack_from ('<Q' , buffer = mv , offset = sh_size_offset )
74
+ return sh_offset , sh_size
75
+
76
+ def handle_elf (mv ):
77
+ # \x02 is ELFCLASS64, \x01 is ELFDATA2LSB. HWASan currently only works on
78
+ # 64-bit little endian platforms (x86_64 and ARM64). If this changes, we will
79
+ # have to extend the parsing code.
80
+ if mv [:6 ] != b'\x7f ELF\x02 \x01 ' :
81
+ return None
82
+ e_shnum , = struct .unpack_from ('<H' , buffer = mv , offset = e_shnum_offset )
83
+ e_shoff , = struct .unpack_from ('<Q' , buffer = mv , offset = e_shoff_offset )
84
+ for i in range (0 , e_shnum ):
85
+ start = e_shoff + i * Shdr_size
86
+ sh_offset , sh_size = handle_Shdr (mv [start : start + Shdr_size ])
87
+ if sh_offset is None :
88
+ continue
89
+ note_hdr = mv [sh_offset : sh_offset + sh_size ]
90
+ result = handle_Nhdr (note_hdr , sh_size )
91
+ if result is not None :
92
+ return result
93
+
94
+ def get_buildid (filename ):
95
+ with open (filename , "r" ) as fd :
96
+ if os .fstat (fd .fileno ()).st_size < Ehdr_size :
97
+ return None
98
+ with mmap .mmap (fd .fileno (), 0 , access = mmap .ACCESS_READ ) as m :
99
+ with memoryview (m ) as mv :
100
+ return handle_elf (mv )
101
+
34
102
class Symbolizer :
35
103
def __init__ (self , path , binary_prefixes , paths_to_cut ):
36
104
self .__pipe = None
@@ -39,6 +107,7 @@ class Symbolizer:
39
107
self .__paths_to_cut = paths_to_cut
40
108
self .__log = False
41
109
self .__warnings = set ()
110
+ self .__index = {}
42
111
43
112
def enable_logging (self , enable ):
44
113
self .__log = enable
@@ -77,9 +146,12 @@ class Symbolizer:
77
146
file_name = re .sub (".*crtstuff.c:0" , "???:0" , file_name )
78
147
return file_name
79
148
80
- def __process_binary_name (self , name ):
149
+ def __process_binary_name (self , name , buildid = None ):
81
150
if name .startswith ('/' ):
82
151
name = name [1 :]
152
+ if buildid is not None and buildid in self .__index :
153
+ return self .__index [buildid ]
154
+
83
155
for p in self .__binary_prefixes :
84
156
full_path = os .path .join (p , name )
85
157
if os .path .exists (full_path ):
@@ -121,10 +193,10 @@ class Symbolizer:
121
193
except Symbolizer .__EOF :
122
194
pass
123
195
124
- def iter_call_stack (self , binary , addr ):
196
+ def iter_call_stack (self , binary , buildid , addr ):
125
197
self .__open_pipe ()
126
198
p = self .__pipe
127
- binary = self .__process_binary_name (binary )
199
+ binary = self .__process_binary_name (binary , buildid )
128
200
if not binary :
129
201
return
130
202
self .__write ("CODE %s %s" % (binary , addr ))
@@ -137,15 +209,25 @@ class Symbolizer:
137
209
except Symbolizer .__EOF :
138
210
pass
139
211
212
+ def build_index (self ):
213
+ for p in self .__binary_prefixes :
214
+ for dname , _ , fnames in os .walk (p ):
215
+ for fn in fnames :
216
+ filename = os .path .join (dname , fn )
217
+ bid = get_buildid (filename )
218
+ if bid is not None :
219
+ self .__index [bid ] = filename
220
+
140
221
def symbolize_line (line , symbolizer_path ):
141
222
#0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
142
- match = re .match (r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)' , line , re .UNICODE )
223
+ match = re .match (r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)(?:\s*\(BuildId: ([0-9a-f]+)\))? ' , line , re .UNICODE )
143
224
if match :
144
225
frameno = match .group (2 )
145
226
binary = match .group (5 )
146
227
addr = int (match .group (6 ), 16 )
228
+ buildid = match .group (7 )
147
229
148
- frames = list (symbolizer .iter_call_stack (binary , addr ))
230
+ frames = list (symbolizer .iter_call_stack (binary , buildid , addr ))
149
231
150
232
if len (frames ) > 0 :
151
233
print ("%s#%s%s%s in %s" % (match .group (1 ), match .group (2 ),
@@ -210,6 +292,7 @@ parser.add_argument('-v', action='store_true')
210
292
parser .add_argument ('--ignore-tags' , action = 'store_true' )
211
293
parser .add_argument ('--symbols' , action = 'append' )
212
294
parser .add_argument ('--source' , action = 'append' )
295
+ parser .add_argument ('--index' , action = 'store_true' )
213
296
parser .add_argument ('--symbolizer' )
214
297
parser .add_argument ('args' , nargs = argparse .REMAINDER )
215
298
args = parser .parse_args ()
@@ -297,6 +380,8 @@ if args.v:
297
380
298
381
symbolizer = Symbolizer (symbolizer_path , binary_prefixes , paths_to_cut )
299
382
symbolizer .enable_logging (args .d )
383
+ if args .index :
384
+ symbolizer .build_index ()
300
385
301
386
for line in sys .stdin :
302
387
if sys .version_info .major < 3 :
0 commit comments