|
6 | 6 | AnnotatedValue,
|
7 | 7 | capture_internal_exception,
|
8 | 8 | disable_capture_event,
|
| 9 | + format_timestamp, |
| 10 | + json_dumps, |
9 | 11 | safe_repr,
|
10 | 12 | strip_string,
|
11 |
| - format_timestamp, |
12 | 13 | )
|
13 | 14 |
|
| 15 | +import sentry_sdk.utils |
| 16 | + |
14 | 17 | from sentry_sdk._compat import text_type, PY2, string_types, number_types, iteritems
|
15 | 18 |
|
16 | 19 | from sentry_sdk._types import MYPY
|
17 | 20 |
|
18 | 21 | if MYPY:
|
| 22 | + from datetime import timedelta |
| 23 | + |
19 | 24 | from types import TracebackType
|
20 | 25 |
|
21 | 26 | from typing import Any
|
| 27 | + from typing import Callable |
| 28 | + from typing import ContextManager |
22 | 29 | from typing import Dict
|
23 | 30 | from typing import List
|
24 | 31 | from typing import Optional
|
25 |
| - from typing import Callable |
26 |
| - from typing import Union |
27 |
| - from typing import ContextManager |
| 32 | + from typing import Tuple |
28 | 33 | from typing import Type
|
| 34 | + from typing import Union |
29 | 35 |
|
30 | 36 | from sentry_sdk._types import NotImplementedType, Event
|
31 | 37 |
|
| 38 | + Span = Dict[str, Any] |
| 39 | + |
32 | 40 | ReprProcessor = Callable[[Any, Dict[str, Any]], Union[NotImplementedType, str]]
|
33 | 41 | Segment = Union[str, int]
|
34 | 42 |
|
|
48 | 56 | # Bytes are technically not strings in Python 3, but we can serialize them
|
49 | 57 | serializable_str_types = (str, bytes)
|
50 | 58 |
|
| 59 | + |
| 60 | +# Maximum length of JSON-serialized event payloads that can be safely sent |
| 61 | +# before the server may reject the event due to its size. This is not intended |
| 62 | +# to reflect actual values defined server-side, but rather only be an upper |
| 63 | +# bound for events sent by the SDK. |
| 64 | +# |
| 65 | +# Can be overwritten if wanting to send more bytes, e.g. with a custom server. |
| 66 | +# When changing this, keep in mind that events may be a little bit larger than |
| 67 | +# this value due to attached metadata, so keep the number conservative. |
| 68 | +MAX_EVENT_BYTES = 10 ** 6 |
| 69 | + |
51 | 70 | MAX_DATABAG_DEPTH = 5
|
52 | 71 | MAX_DATABAG_BREADTH = 10
|
53 | 72 | CYCLE_MARKER = u"<cyclic>"
|
@@ -93,11 +112,12 @@ def __exit__(
|
93 | 112 | self._ids.pop(id(self._objs.pop()), None)
|
94 | 113 |
|
95 | 114 |
|
96 |
| -def serialize(event, **kwargs): |
97 |
| - # type: (Event, **Any) -> Event |
| 115 | +def serialize(event, smart_transaction_trimming=False, **kwargs): |
| 116 | + # type: (Event, bool, **Any) -> Event |
98 | 117 | memo = Memo()
|
99 | 118 | path = [] # type: List[Segment]
|
100 | 119 | meta_stack = [] # type: List[Dict[str, Any]]
|
| 120 | + span_description_bytes = [] # type: List[int] |
101 | 121 |
|
102 | 122 | def _annotate(**meta):
|
103 | 123 | # type: (**Any) -> None
|
@@ -325,14 +345,113 @@ def _serialize_node_impl(
|
325 | 345 | if not isinstance(obj, string_types):
|
326 | 346 | obj = safe_repr(obj)
|
327 | 347 |
|
| 348 | + # Allow span descriptions to be longer than other strings. |
| 349 | + # |
| 350 | + # For database auto-instrumented spans, the description contains |
| 351 | + # potentially long SQL queries that are most useful when not truncated. |
| 352 | + # Because arbitrarily large events may be discarded by the server as a |
| 353 | + # protection mechanism, we dynamically limit the description length |
| 354 | + # later in _truncate_span_descriptions. |
| 355 | + if ( |
| 356 | + smart_transaction_trimming |
| 357 | + and len(path) == 3 |
| 358 | + and path[0] == "spans" |
| 359 | + and path[-1] == "description" |
| 360 | + ): |
| 361 | + span_description_bytes.append(len(obj)) |
| 362 | + return obj |
328 | 363 | return _flatten_annotated(strip_string(obj))
|
329 | 364 |
|
| 365 | + def _truncate_span_descriptions(serialized_event, event, excess_bytes): |
| 366 | + # type: (Event, Event, int) -> None |
| 367 | + """ |
| 368 | + Modifies serialized_event in-place trying to remove excess_bytes from |
| 369 | + span descriptions. The original event is used read-only to access the |
| 370 | + span timestamps (represented as RFC3399-formatted strings in |
| 371 | + serialized_event). |
| 372 | +
|
| 373 | + It uses heuristics to prioritize preserving the description of spans |
| 374 | + that might be the most interesting ones in terms of understanding and |
| 375 | + optimizing performance. |
| 376 | + """ |
| 377 | + # When truncating a description, preserve a small prefix. |
| 378 | + min_length = 10 |
| 379 | + |
| 380 | + def shortest_duration_longest_description_first(args): |
| 381 | + # type: (Tuple[int, Span]) -> Tuple[timedelta, int] |
| 382 | + i, serialized_span = args |
| 383 | + span = event["spans"][i] |
| 384 | + now = datetime.utcnow() |
| 385 | + start = span.get("start_timestamp") or now |
| 386 | + end = span.get("timestamp") or now |
| 387 | + duration = end - start |
| 388 | + description = serialized_span.get("description") or "" |
| 389 | + return (duration, -len(description)) |
| 390 | + |
| 391 | + # Note: for simplicity we sort spans by exact duration and description |
| 392 | + # length. If ever needed, we could have a more involved heuristic, e.g. |
| 393 | + # replacing exact durations with "buckets" and/or looking at other span |
| 394 | + # properties. |
| 395 | + path.append("spans") |
| 396 | + for i, span in sorted( |
| 397 | + enumerate(serialized_event.get("spans") or []), |
| 398 | + key=shortest_duration_longest_description_first, |
| 399 | + ): |
| 400 | + description = span.get("description") or "" |
| 401 | + if len(description) <= min_length: |
| 402 | + continue |
| 403 | + excess_bytes -= len(description) - min_length |
| 404 | + path.extend([i, "description"]) |
| 405 | + # Note: the last time we call strip_string we could preserve a few |
| 406 | + # more bytes up to a total length of MAX_EVENT_BYTES. Since that's |
| 407 | + # not strictly required, we leave it out for now for simplicity. |
| 408 | + span["description"] = _flatten_annotated( |
| 409 | + strip_string(description, max_length=min_length) |
| 410 | + ) |
| 411 | + del path[-2:] |
| 412 | + del meta_stack[len(path) + 1 :] |
| 413 | + |
| 414 | + if excess_bytes <= 0: |
| 415 | + break |
| 416 | + path.pop() |
| 417 | + del meta_stack[len(path) + 1 :] |
| 418 | + |
330 | 419 | disable_capture_event.set(True)
|
331 | 420 | try:
|
332 | 421 | rv = _serialize_node(event, **kwargs)
|
333 | 422 | if meta_stack and isinstance(rv, dict):
|
334 | 423 | rv["_meta"] = meta_stack[0]
|
335 | 424 |
|
| 425 | + sum_span_description_bytes = sum(span_description_bytes) |
| 426 | + if smart_transaction_trimming and sum_span_description_bytes > 0: |
| 427 | + span_count = len(event.get("spans") or []) |
| 428 | + # This is an upper bound of how many bytes all descriptions would |
| 429 | + # consume if the usual string truncation in _serialize_node_impl |
| 430 | + # would have taken place, not accounting for the metadata attached |
| 431 | + # as event["_meta"]. |
| 432 | + descriptions_budget_bytes = span_count * sentry_sdk.utils.MAX_STRING_LENGTH |
| 433 | + |
| 434 | + # If by not truncating descriptions we ended up with more bytes than |
| 435 | + # per the usual string truncation, check if the event is too large |
| 436 | + # and we need to truncate some descriptions. |
| 437 | + # |
| 438 | + # This is guarded with an if statement to avoid JSON-encoding the |
| 439 | + # event unnecessarily. |
| 440 | + if sum_span_description_bytes > descriptions_budget_bytes: |
| 441 | + original_bytes = len(json_dumps(rv)) |
| 442 | + excess_bytes = original_bytes - MAX_EVENT_BYTES |
| 443 | + if excess_bytes > 0: |
| 444 | + # Event is too large, will likely be discarded by the |
| 445 | + # server. Trim it down before sending. |
| 446 | + _truncate_span_descriptions(rv, event, excess_bytes) |
| 447 | + |
| 448 | + # Span descriptions truncated, set or reset _meta. |
| 449 | + # |
| 450 | + # We run the same code earlier because we want to account |
| 451 | + # for _meta when calculating original_bytes, the number of |
| 452 | + # bytes in the JSON-encoded event. |
| 453 | + if meta_stack and isinstance(rv, dict): |
| 454 | + rv["_meta"] = meta_stack[0] |
336 | 455 | return rv
|
337 | 456 | finally:
|
338 | 457 | disable_capture_event.set(False)
|
0 commit comments