Skip to content

Use a deeper copy for frame vars #3392

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 212 additions & 0 deletions sentry_sdk/_copy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
"""
A modified version of Python 3.11's copy.deepcopy (found in Python's 'cpython/Lib/copy.py')
that falls back to repr for non-datastrucure types that we use for extracting frame local variables
in a safe way without holding references to the original objects.

https://github.com/python/cpython/blob/v3.11.7/Lib/copy.py#L128-L241

Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation;

All Rights Reserved


PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
--------------------------------------------

1. This LICENSE AGREEMENT is between the Python Software Foundation
("PSF"), and the Individual or Organization ("Licensee") accessing and
otherwise using this software ("Python") in source or binary form and
its associated documentation.

2. Subject to the terms and conditions of this License Agreement, PSF hereby
grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
analyze, test, perform and/or display publicly, prepare derivative works,
distribute, and otherwise use Python alone or in any derivative version,
provided, however, that PSF's License Agreement and PSF's notice of copyright,
i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation;
All Rights Reserved" are retained in Python alone or in any derivative version
prepared by Licensee.

3. In the event Licensee prepares a derivative work that is based on
or incorporates Python or any part thereof, and wants to make
the derivative work available to others as provided herein, then
Licensee hereby agrees to include in any such work a brief summary of
the changes made to Python.

4. PSF is making Python available to Licensee on an "AS IS"
basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
INFRINGE ANY THIRD PARTY RIGHTS.

5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.

6. This License Agreement will automatically terminate upon a material
breach of its terms and conditions.

7. Nothing in this License Agreement shall be deemed to create any
relationship of agency, partnership, or joint venture between PSF and
Licensee. This License Agreement does not grant permission to use PSF
trademarks or trade name in a trademark sense to endorse or promote
products or services of Licensee, or any third party.

8. By copying, installing or otherwise using Python, Licensee
agrees to be bound by the terms and conditions of this License
Agreement.

"""

import types
import weakref
import sys
from collections.abc import Mapping, Sequence, Set

from sentry_sdk.utils import (
safe_repr,
serializable_str_types,
capture_internal_exception,
capture_event_disabled,
)
from sentry_sdk._types import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Any, Optional, Union


# copying these over to avoid yet another circular dep
MAX_DATABAG_DEPTH = 5
MAX_DATABAG_BREADTH = 10


def deepcopy_fallback_repr(x, memo=None, _nil=[], stack_depth=0): # noqa: B006
# type: (Any, Optional[dict[int, Any]], Any, int) -> Any
"""Deep copy like operation on arbitrary Python objects that falls back to repr
for non-datastructure like objects.
Also has a max recursion depth of 10 because more than that will be thrown away by
the serializer anyway.
"""
with capture_event_disabled():
try:
if memo is None:
memo = {}

d = id(x)
y = memo.get(d, _nil)
if y is not _nil:
return y

cls = type(x)

copier = _deepcopy_dispatch.get(cls)
if copier is not None:
y = copier(x, memo, stack_depth=stack_depth + 1)
elif issubclass(cls, type):
y = _deepcopy_atomic(x, memo, stack_depth=stack_depth + 1)
elif isinstance(x, serializable_str_types):
y = safe_repr(x)
elif isinstance(x, Mapping):
y = _deepcopy_dict(x, memo, stack_depth=stack_depth + 1)
elif not isinstance(x, serializable_str_types) and isinstance(
x, (Set, Sequence)
):
y = _deepcopy_list(x, memo, stack_depth=stack_depth + 1)
else:
y = safe_repr(x)

# If is its own copy, don't memoize.
if y is not x:
memo[d] = y
_keep_alive(x, memo) # Make sure x lives at least as long as d
return y
except BaseException:
capture_internal_exception(sys.exc_info())
return "<failed to serialize, use init(debug=True) to see error logs>"


_deepcopy_dispatch = d = {} # type: dict[Any, Any]


def _deepcopy_atomic(x, memo, stack_depth=0):
# type: (Any, dict[int, Any], int) -> Any
return x


d[type(None)] = _deepcopy_atomic
d[type(Ellipsis)] = _deepcopy_atomic
d[type(NotImplemented)] = _deepcopy_atomic
d[int] = _deepcopy_atomic
d[float] = _deepcopy_atomic
d[bool] = _deepcopy_atomic
d[complex] = _deepcopy_atomic
d[bytes] = _deepcopy_atomic
d[str] = _deepcopy_atomic
d[types.CodeType] = _deepcopy_atomic
d[type] = _deepcopy_atomic
d[range] = _deepcopy_atomic
d[types.BuiltinFunctionType] = _deepcopy_atomic
d[types.FunctionType] = _deepcopy_atomic
d[weakref.ref] = _deepcopy_atomic
d[property] = _deepcopy_atomic


def _deepcopy_list(x, memo, stack_depth=0):
# type: (Union[Sequence[Any], Set[Any]], dict[int, Any], int) -> list[Any]
y = [] # type: list[Any]
memo[id(x)] = y
if stack_depth >= MAX_DATABAG_DEPTH:
return y
append = y.append
for i, a in enumerate(x):
if i >= MAX_DATABAG_BREADTH:
break
append(deepcopy_fallback_repr(a, memo, stack_depth=stack_depth + 1))
return y


def _deepcopy_dict(x, memo, stack_depth=0):
# type: (Mapping[Any, Any], dict[int, Any], int) -> dict[Any, Any]
y = {} # type: dict[Any, Any]
memo[id(x)] = y
if stack_depth >= MAX_DATABAG_DEPTH:
return y
i = 0
for key, value in x.items():
if i >= MAX_DATABAG_BREADTH:
break
y[deepcopy_fallback_repr(key, memo)] = deepcopy_fallback_repr(value, memo)
i += 1
return y


def _deepcopy_method(x, memo): # Copy instance methods
# type: (types.MethodType, dict[int, Any]) -> types.MethodType
return type(x)(x.__func__, deepcopy_fallback_repr(x.__self__, memo))


d[types.MethodType] = _deepcopy_method

del d


def _keep_alive(x, memo):
# type: (Any, dict[int, Any]) -> None
"""Keeps a reference to the object x in the memo.

Because we remember objects by their id, we have
to assure that possibly temporary objects are kept
alive by referencing them.
We store a reference at the id of the memo, which should
normally not be used unless someone tries to deepcopy
the memo itself...
"""
try:
memo[id(memo)].append(x)
except KeyError:
# aha, this is the first one :-)
memo[id(memo)] = [x]
4 changes: 0 additions & 4 deletions sentry_sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from sentry_sdk.utils import (
capture_internal_exceptions,
current_stacktrace,
disable_capture_event,
format_timestamp,
get_sdk_name,
get_type_name,
Expand Down Expand Up @@ -726,9 +725,6 @@ def capture_event(

:returns: An event ID. May be `None` if there is no DSN set or of if the SDK decided to discard the event for other reasons. In such situations setting `debug=True` on `init()` may help.
"""
if disable_capture_event.get(False):
return None

if hint is None:
hint = {}
event_id = event.get("event_id")
Expand Down
10 changes: 10 additions & 0 deletions sentry_sdk/scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
)
from sentry_sdk._types import TYPE_CHECKING
from sentry_sdk.utils import (
disable_capture_event,
capture_internal_exception,
capture_internal_exceptions,
ContextVar,
Expand Down Expand Up @@ -1130,6 +1131,9 @@ def capture_event(self, event, hint=None, scope=None, **scope_kwargs):

:returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`).
"""
if disable_capture_event.get(False):
return None

scope = self._merge_scopes(scope, scope_kwargs)

event_id = self.get_client().capture_event(event=event, hint=hint, scope=scope)
Expand Down Expand Up @@ -1157,6 +1161,9 @@ def capture_message(self, message, level=None, scope=None, **scope_kwargs):

:returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`).
"""
if disable_capture_event.get(False):
return None

if level is None:
level = "info"

Expand All @@ -1182,6 +1189,9 @@ def capture_exception(self, error=None, scope=None, **scope_kwargs):

:returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`).
"""
if disable_capture_event.get(False):
return None

if error is not None:
exc_info = exc_info_from_error(error)
else:
Expand Down
5 changes: 1 addition & 4 deletions sentry_sdk/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
format_timestamp,
safe_repr,
strip_string,
serializable_str_types,
)
from sentry_sdk._types import TYPE_CHECKING

Expand All @@ -33,10 +34,6 @@
Segment = Union[str, int]


# Bytes are technically not strings in Python 3, but we can serialize them
serializable_str_types = (str, bytes, bytearray, memoryview)


# Maximum length of JSON-serialized event payloads that can be safely sent
# before the server may reject the event due to its size. This is not intended
# to reflect actual values defined server-side, but rather only be an upper
Expand Down
28 changes: 23 additions & 5 deletions sentry_sdk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import sys
import threading
import time
from contextlib import contextmanager
from collections import namedtuple
from datetime import datetime
from decimal import Decimal
Expand Down Expand Up @@ -50,6 +51,7 @@
Type,
TypeVar,
Union,
Generator,
)

from gevent.hub import Hub
Expand All @@ -71,6 +73,9 @@

SENSITIVE_DATA_SUBSTITUTE = "[Filtered]"

# Bytes are technically not strings in Python 3, but we can serialize them
serializable_str_types = (str, bytes, bytearray, memoryview)


def json_dumps(data):
# type: (Any) -> bytes
Expand Down Expand Up @@ -549,10 +554,11 @@ def safe_str(value):

def safe_repr(value):
# type: (Any) -> str
try:
return repr(value)
except Exception:
return "<broken repr>"
with capture_event_disabled():
try:
return repr(value)
except Exception:
return "<broken repr>"


def filename_for_module(module, abs_path):
Expand Down Expand Up @@ -616,7 +622,9 @@ def serialize_frame(
)

if include_local_variables:
rv["vars"] = frame.f_locals.copy()
from sentry_sdk._copy import deepcopy_fallback_repr

rv["vars"] = deepcopy_fallback_repr(frame.f_locals)

return rv

Expand Down Expand Up @@ -1370,6 +1378,16 @@ def transaction_from_function(func):
disable_capture_event = ContextVar("disable_capture_event")


@contextmanager
def capture_event_disabled():
# type: () -> Generator[None, None, None]
disable_capture_event.set(True)
try:
yield
finally:
disable_capture_event.set(False)


class ServerlessTimeoutWarning(Exception): # noqa: N818
"""Raised when a serverless method is about to reach its timeout."""

Expand Down
17 changes: 17 additions & 0 deletions tests/test_scrubber.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,20 @@ def test_recursive_event_scrubber(sentry_init, capture_events):

(event,) = events
assert event["extra"]["deep"]["deeper"][0]["deepest"]["password"] == "'[Filtered]'"


def test_recursive_scrubber_does_not_override_original(sentry_init, capture_events):
sentry_init(event_scrubber=EventScrubber(recursive=True))
events = capture_events()

data = {"csrf": "secret"}
try:
raise RuntimeError("An error")
except Exception:
capture_exception()

(event,) = events
frames = event["exception"]["values"][0]["stacktrace"]["frames"]
(frame,) = frames
assert data["csrf"] == "secret"
assert frame["vars"]["data"]["csrf"] == "[Filtered]"
Loading