21
21
import threading
22
22
import time
23
23
import uuid
24
- from collections import deque , namedtuple
24
+ from collections import deque
25
25
from contextlib import contextmanager
26
26
27
27
import sentry_sdk
35
35
nanosecond_time ,
36
36
)
37
37
38
- RawFrameData = namedtuple (
39
- "RawFrameData" , ["abs_path" , "filename" , "function" , "lineno" , "module" ]
40
- )
41
-
42
38
if MYPY :
43
39
from types import FrameType
44
40
from typing import Any
54
50
import sentry_sdk .scope
55
51
import sentry_sdk .tracing
56
52
57
- RawStack = Tuple [RawFrameData , ...]
58
- RawSample = Sequence [Tuple [str , RawStack ]]
59
- RawSampleWithId = Sequence [Tuple [str , int , RawStack ]]
53
+ StackId = int
54
+
55
+ RawFrame = Tuple [
56
+ str , # abs_path
57
+ Optional [str ], # module
58
+ Optional [str ], # filename
59
+ str , # function
60
+ int , # lineno
61
+ ]
62
+ RawStack = Tuple [RawFrame , ...]
63
+ RawSample = Sequence [Tuple [str , Tuple [StackId , RawStack ]]]
60
64
61
65
ProcessedStack = Tuple [int , ...]
62
66
@@ -155,8 +159,13 @@ def teardown_profiler():
155
159
MAX_STACK_DEPTH = 128
156
160
157
161
158
- def extract_stack (frame , max_stack_depth = MAX_STACK_DEPTH ):
159
- # type: (Optional[FrameType], int) -> Tuple[RawFrameData, ...]
162
+ def extract_stack (
163
+ frame , # type: Optional[FrameType]
164
+ cwd , # type: str
165
+ prev_cache = None , # type: Optional[Tuple[StackId, RawStack, Deque[FrameType]]]
166
+ max_stack_depth = MAX_STACK_DEPTH , # type: int
167
+ ):
168
+ # type: (...) -> Tuple[StackId, RawStack, Deque[FrameType]]
160
169
"""
161
170
Extracts the stack starting the specified frame. The extracted stack
162
171
assumes the specified frame is the top of the stack, and works back
@@ -166,30 +175,71 @@ def extract_stack(frame, max_stack_depth=MAX_STACK_DEPTH):
166
175
only the first `MAX_STACK_DEPTH` frames will be returned.
167
176
"""
168
177
169
- stack = deque (maxlen = max_stack_depth ) # type: Deque[FrameType]
178
+ frames = deque (maxlen = max_stack_depth ) # type: Deque[FrameType]
170
179
171
180
while frame is not None :
172
- stack .append (frame )
181
+ frames .append (frame )
173
182
frame = frame .f_back
174
183
175
- return tuple (extract_frame (frame ) for frame in stack )
184
+ if prev_cache is None :
185
+ stack = tuple (extract_frame (frame , cwd ) for frame in frames )
186
+ else :
187
+ _ , prev_stack , prev_frames = prev_cache
188
+ prev_depth = len (prev_frames )
189
+ depth = len (frames )
190
+
191
+ # We want to match the frame found in this sample to the frames found in the
192
+ # previous sample. If they are the same (using the `is` operator), we can
193
+ # skip the expensive work of extracting the frame information and reuse what
194
+ # we extracted during the last sample.
195
+ #
196
+ # Make sure to keep in mind that the stack is ordered from the inner most
197
+ # from to the outer most frame so be careful with the indexing.
198
+ stack = tuple (
199
+ prev_stack [i ]
200
+ if i >= 0 and frame is prev_frames [i ]
201
+ else extract_frame (frame , cwd )
202
+ for i , frame in zip (range (prev_depth - depth , prev_depth ), frames )
203
+ )
204
+
205
+ # Instead of mapping the stack into frame ids and hashing
206
+ # that as a tuple, we can directly hash the stack.
207
+ # This saves us from having to generate yet another list.
208
+ # Additionally, using the stack as the key directly is
209
+ # costly because the stack can be large, so we pre-hash
210
+ # the stack, and use the hash as the key as this will be
211
+ # needed a few times to improve performance.
212
+ stack_id = hash (stack )
176
213
214
+ return stack_id , stack , frames
177
215
178
- def extract_frame (frame ):
179
- # type: (FrameType) -> RawFrameData
216
+
217
+ def extract_frame (frame , cwd ):
218
+ # type: (FrameType, str) -> RawFrame
180
219
abs_path = frame .f_code .co_filename
181
220
182
221
try :
183
222
module = frame .f_globals ["__name__" ]
184
223
except Exception :
185
224
module = None
186
225
187
- return RawFrameData (
188
- abs_path = os .path .abspath (abs_path ),
189
- filename = filename_for_module (module , abs_path ) or None ,
190
- function = get_frame_name (frame ),
191
- lineno = frame .f_lineno ,
192
- module = module ,
226
+ # namedtuples can be many times slower when initialing
227
+ # and accessing attribute so we opt to use a tuple here instead
228
+ return (
229
+ # This originally was `os.path.abspath(abs_path)` but that had
230
+ # a large performance overhead.
231
+ #
232
+ # According to docs, this is equivalent to
233
+ # `os.path.normpath(os.path.join(os.getcwd(), path))`.
234
+ # The `os.getcwd()` call is slow here, so we precompute it.
235
+ #
236
+ # Additionally, since we are using normalized path already,
237
+ # we skip calling `os.path.normpath` entirely.
238
+ os .path .join (cwd , abs_path ),
239
+ module ,
240
+ filename_for_module (module , abs_path ) or None ,
241
+ get_frame_name (frame ),
242
+ frame .f_lineno ,
193
243
)
194
244
195
245
@@ -200,6 +250,8 @@ def get_frame_name(frame):
200
250
# we should consider using instead where possible
201
251
202
252
f_code = frame .f_code
253
+ co_varnames = f_code .co_varnames
254
+
203
255
# co_name only contains the frame name. If the frame was a method,
204
256
# the class name will NOT be included.
205
257
name = f_code .co_name
@@ -210,8 +262,8 @@ def get_frame_name(frame):
210
262
if (
211
263
# the co_varnames start with the frame's positional arguments
212
264
# and we expect the first to be `self` if its an instance method
213
- f_code . co_varnames
214
- and f_code . co_varnames [0 ] == "self"
265
+ co_varnames
266
+ and co_varnames [0 ] == "self"
215
267
and "self" in frame .f_locals
216
268
):
217
269
for cls in frame .f_locals ["self" ].__class__ .__mro__ :
@@ -226,8 +278,8 @@ def get_frame_name(frame):
226
278
if (
227
279
# the co_varnames start with the frame's positional arguments
228
280
# and we expect the first to be `cls` if its a class method
229
- f_code . co_varnames
230
- and f_code . co_varnames [0 ] == "cls"
281
+ co_varnames
282
+ and co_varnames [0 ] == "cls"
231
283
and "cls" in frame .f_locals
232
284
):
233
285
for cls in frame .f_locals ["cls" ].__mro__ :
@@ -338,13 +390,11 @@ class SampleBuffer(object):
338
390
def __init__ (self , capacity ):
339
391
# type: (int) -> None
340
392
341
- self .buffer = [
342
- None
343
- ] * capacity # type: List[Optional[Tuple[int, RawSampleWithId]]]
393
+ self .buffer = [None ] * capacity # type: List[Optional[Tuple[int, RawSample]]]
344
394
self .capacity = capacity # type: int
345
395
self .idx = 0 # type: int
346
396
347
- def write (self , ts , raw_sample ):
397
+ def write (self , ts , sample ):
348
398
# type: (int, RawSample) -> None
349
399
"""
350
400
Writing to the buffer is not thread safe. There is the possibility
@@ -359,40 +409,24 @@ def write(self, ts, raw_sample):
359
409
"""
360
410
idx = self .idx
361
411
362
- sample = [
363
- (
364
- thread_id ,
365
- # Instead of mapping the stack into frame ids and hashing
366
- # that as a tuple, we can directly hash the stack.
367
- # This saves us from having to generate yet another list.
368
- # Additionally, using the stack as the key directly is
369
- # costly because the stack can be large, so we pre-hash
370
- # the stack, and use the hash as the key as this will be
371
- # needed a few times to improve performance.
372
- hash (stack ),
373
- stack ,
374
- )
375
- for thread_id , stack in raw_sample
376
- ]
377
-
378
412
self .buffer [idx ] = (ts , sample )
379
413
self .idx = (idx + 1 ) % self .capacity
380
414
381
415
def slice_profile (self , start_ns , stop_ns ):
382
416
# type: (int, int) -> ProcessedProfile
383
417
samples = [] # type: List[ProcessedSample]
384
- stacks = dict () # type: Dict[int , int]
385
- stacks_list = list () # type: List[ProcessedStack]
386
- frames = dict () # type: Dict[RawFrameData , int]
387
- frames_list = list () # type: List[ProcessedFrame]
418
+ stacks = {} # type: Dict[StackId , int]
419
+ stacks_list = [] # type: List[ProcessedStack]
420
+ frames = {} # type: Dict[RawFrame , int]
421
+ frames_list = [] # type: List[ProcessedFrame]
388
422
389
423
for ts , sample in filter (None , self .buffer ):
390
424
if start_ns > ts or ts > stop_ns :
391
425
continue
392
426
393
427
elapsed_since_start_ns = str (ts - start_ns )
394
428
395
- for tid , hashed_stack , stack in sample :
429
+ for tid , ( hashed_stack , stack ) in sample :
396
430
# Check if the stack is indexed first, this lets us skip
397
431
# indexing frames if it's not necessary
398
432
if hashed_stack not in stacks :
@@ -401,11 +435,11 @@ def slice_profile(self, start_ns, stop_ns):
401
435
frames [frame ] = len (frames )
402
436
frames_list .append (
403
437
{
404
- "abs_path" : frame . abs_path ,
405
- "function " : frame . function or "<unknown>" ,
406
- "filename" : frame . filename ,
407
- "lineno " : frame . lineno ,
408
- "module " : frame . module ,
438
+ "abs_path" : frame [ 0 ] ,
439
+ "module " : frame [ 1 ] ,
440
+ "filename" : frame [ 2 ] ,
441
+ "function " : frame [ 3 ] ,
442
+ "lineno " : frame [ 4 ] ,
409
443
}
410
444
)
411
445
@@ -439,6 +473,14 @@ def slice_profile(self, start_ns, stop_ns):
439
473
440
474
def make_sampler (self ):
441
475
# type: () -> Callable[..., None]
476
+ cwd = os .getcwd ()
477
+
478
+ # In Python3+, we can use the `nonlocal` keyword to rebind the value,
479
+ # but this is not possible in Python2. To get around this, we wrap
480
+ # the value in a list to allow updating this value each sample.
481
+ last_sample = [
482
+ {}
483
+ ] # type: List[Dict[int, Tuple[StackId, RawStack, Deque[FrameType]]]]
442
484
443
485
def _sample_stack (* args , ** kwargs ):
444
486
# type: (*Any, **Any) -> None
@@ -447,13 +489,20 @@ def _sample_stack(*args, **kwargs):
447
489
This should be called at a regular interval to collect samples.
448
490
"""
449
491
450
- self .write (
451
- nanosecond_time (),
452
- [
453
- (str (tid ), extract_stack (frame ))
454
- for tid , frame in sys ._current_frames ().items ()
455
- ],
456
- )
492
+ now = nanosecond_time ()
493
+ raw_sample = {
494
+ tid : extract_stack (frame , cwd , last_sample [0 ].get (tid ))
495
+ for tid , frame in sys ._current_frames ().items ()
496
+ }
497
+
498
+ last_sample [0 ] = raw_sample
499
+
500
+ sample = [
501
+ (str (tid ), (stack_id , stack ))
502
+ for tid , (stack_id , stack , _ ) in raw_sample .items ()
503
+ ]
504
+
505
+ self .write (now , sample )
457
506
458
507
return _sample_stack
459
508
0 commit comments