Skip to content

Commit 7d46709

Browse files
authored
Serialize vars early to avoid living references (#3409)
1 parent 5529c70 commit 7d46709

File tree

6 files changed

+67
-57
lines changed

6 files changed

+67
-57
lines changed

sentry_sdk/client.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
from collections.abc import Mapping
66
from datetime import datetime, timezone
77
from importlib import import_module
8+
from typing import cast
89

910
from sentry_sdk._compat import PY37, check_uwsgi_thread_support
1011
from sentry_sdk.utils import (
1112
capture_internal_exceptions,
1213
current_stacktrace,
13-
disable_capture_event,
1414
format_timestamp,
1515
get_sdk_name,
1616
get_type_name,
@@ -525,10 +525,13 @@ def _prepare_event(
525525
# Postprocess the event here so that annotated types do
526526
# generally not surface in before_send
527527
if event is not None:
528-
event = serialize(
529-
event,
530-
max_request_body_size=self.options.get("max_request_body_size"),
531-
max_value_length=self.options.get("max_value_length"),
528+
event = cast(
529+
"Event",
530+
serialize(
531+
cast("Dict[str, Any]", event),
532+
max_request_body_size=self.options.get("max_request_body_size"),
533+
max_value_length=self.options.get("max_value_length"),
534+
),
532535
)
533536

534537
before_send = self.options["before_send"]
@@ -726,9 +729,6 @@ def capture_event(
726729
727730
:returns: An event ID. May be `None` if there is no DSN set or of if the SDK decided to discard the event for other reasons. In such situations setting `debug=True` on `init()` may help.
728731
"""
729-
if disable_capture_event.get(False):
730-
return None
731-
732732
if hint is None:
733733
hint = {}
734734
event_id = event.get("event_id")

sentry_sdk/integrations/pure_eval.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,8 @@ def start(n):
131131
atok = source.asttokens()
132132

133133
expressions.sort(key=closeness, reverse=True)
134-
return {
134+
vars = {
135135
atok.get_text(nodes[0]): value
136136
for nodes, value in expressions[: serializer.MAX_DATABAG_BREADTH]
137137
}
138+
return serializer.serialize(vars, is_vars=True)

sentry_sdk/scope.py

+10
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
capture_internal_exception,
3232
capture_internal_exceptions,
3333
ContextVar,
34+
disable_capture_event,
3435
event_from_exception,
3536
exc_info_from_error,
3637
logger,
@@ -1130,6 +1131,9 @@ def capture_event(self, event, hint=None, scope=None, **scope_kwargs):
11301131
11311132
:returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`).
11321133
"""
1134+
if disable_capture_event.get(False):
1135+
return None
1136+
11331137
scope = self._merge_scopes(scope, scope_kwargs)
11341138

11351139
event_id = self.get_client().capture_event(event=event, hint=hint, scope=scope)
@@ -1157,6 +1161,9 @@ def capture_message(self, message, level=None, scope=None, **scope_kwargs):
11571161
11581162
:returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`).
11591163
"""
1164+
if disable_capture_event.get(False):
1165+
return None
1166+
11601167
if level is None:
11611168
level = "info"
11621169

@@ -1182,6 +1189,9 @@ def capture_exception(self, error=None, scope=None, **scope_kwargs):
11821189
11831190
:returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`).
11841191
"""
1192+
if disable_capture_event.get(False):
1193+
return None
1194+
11851195
if error is not None:
11861196
exc_info = exc_info_from_error(error)
11871197
else:

sentry_sdk/serializer.py

+27-47
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from typing import Type
2626
from typing import Union
2727

28-
from sentry_sdk._types import NotImplementedType, Event
28+
from sentry_sdk._types import NotImplementedType
2929

3030
Span = Dict[str, Any]
3131

@@ -95,7 +95,25 @@ def __exit__(
9595

9696

9797
def serialize(event, **kwargs):
98-
# type: (Event, **Any) -> Event
98+
# type: (Dict[str, Any], **Any) -> Dict[str, Any]
99+
"""
100+
A very smart serializer that takes a dict and emits a json-friendly dict.
101+
Currently used for serializing the final Event and also prematurely while fetching the stack
102+
local variables for each frame in a stacktrace.
103+
104+
It works internally with 'databags' which are arbitrary data structures like Mapping, Sequence and Set.
105+
The algorithm itself is a recursive graph walk down the data structures it encounters.
106+
107+
It has the following responsibilities:
108+
* Trimming databags and keeping them within MAX_DATABAG_BREADTH and MAX_DATABAG_DEPTH.
109+
* Calling safe_repr() on objects appropriately to keep them informative and readable in the final payload.
110+
* Annotating the payload with the _meta field whenever trimming happens.
111+
112+
:param max_request_body_size: If set to "always", will never trim request bodies.
113+
:param max_value_length: The max length to strip strings to, defaults to sentry_sdk.consts.DEFAULT_MAX_VALUE_LENGTH
114+
:param is_vars: If we're serializing vars early, we want to repr() things that are JSON-serializable to make their type more apparent. For example, it's useful to see the difference between a unicode-string and a bytestring when viewing a stacktrace.
115+
116+
"""
99117
memo = Memo()
100118
path = [] # type: List[Segment]
101119
meta_stack = [] # type: List[Dict[str, Any]]
@@ -104,6 +122,7 @@ def serialize(event, **kwargs):
104122
kwargs.pop("max_request_body_size", None) == "always"
105123
) # type: bool
106124
max_value_length = kwargs.pop("max_value_length", None) # type: Optional[int]
125+
is_vars = kwargs.pop("is_vars", False)
107126

108127
def _annotate(**meta):
109128
# type: (**Any) -> None
@@ -118,56 +137,17 @@ def _annotate(**meta):
118137

119138
meta_stack[-1].setdefault("", {}).update(meta)
120139

121-
def _should_repr_strings():
122-
# type: () -> Optional[bool]
123-
"""
124-
By default non-serializable objects are going through
125-
safe_repr(). For certain places in the event (local vars) we
126-
want to repr() even things that are JSON-serializable to
127-
make their type more apparent. For example, it's useful to
128-
see the difference between a unicode-string and a bytestring
129-
when viewing a stacktrace.
130-
131-
For container-types we still don't do anything different.
132-
Generally we just try to make the Sentry UI present exactly
133-
what a pretty-printed repr would look like.
134-
135-
:returns: `True` if we are somewhere in frame variables, and `False` if
136-
we are in a position where we will never encounter frame variables
137-
when recursing (for example, we're in `event.extra`). `None` if we
138-
are not (yet) in frame variables, but might encounter them when
139-
recursing (e.g. we're in `event.exception`)
140-
"""
141-
try:
142-
p0 = path[0]
143-
if p0 == "stacktrace" and path[1] == "frames" and path[3] == "vars":
144-
return True
145-
146-
if (
147-
p0 in ("threads", "exception")
148-
and path[1] == "values"
149-
and path[3] == "stacktrace"
150-
and path[4] == "frames"
151-
and path[6] == "vars"
152-
):
153-
return True
154-
except IndexError:
155-
return None
156-
157-
return False
158-
159140
def _is_databag():
160141
# type: () -> Optional[bool]
161142
"""
162143
A databag is any value that we need to trim.
144+
True for stuff like vars, request bodies, breadcrumbs and extra.
163145
164-
:returns: Works like `_should_repr_strings()`. `True` for "yes",
165-
`False` for :"no", `None` for "maybe soon".
146+
:returns: `True` for "yes", `False` for :"no", `None` for "maybe soon".
166147
"""
167148
try:
168-
rv = _should_repr_strings()
169-
if rv in (True, None):
170-
return rv
149+
if is_vars:
150+
return True
171151

172152
is_request_body = _is_request_body()
173153
if is_request_body in (True, None):
@@ -253,7 +233,7 @@ def _serialize_node_impl(
253233
if isinstance(obj, AnnotatedValue):
254234
should_repr_strings = False
255235
if should_repr_strings is None:
256-
should_repr_strings = _should_repr_strings()
236+
should_repr_strings = is_vars
257237

258238
if is_databag is None:
259239
is_databag = _is_databag()
@@ -387,7 +367,7 @@ def _serialize_node_impl(
387367
disable_capture_event.set(True)
388368
try:
389369
serialized_event = _serialize_node(event, **kwargs)
390-
if meta_stack and isinstance(serialized_event, dict):
370+
if not is_vars and meta_stack and isinstance(serialized_event, dict):
391371
serialized_event["_meta"] = meta_stack[0]
392372

393373
return serialized_event

sentry_sdk/utils.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,9 @@ def serialize_frame(
616616
)
617617

618618
if include_local_variables:
619-
rv["vars"] = frame.f_locals.copy()
619+
from sentry_sdk.serializer import serialize
620+
621+
rv["vars"] = serialize(dict(frame.f_locals), is_vars=True)
620622

621623
return rv
622624

tests/test_scrubber.py

+17
Original file line numberDiff line numberDiff line change
@@ -187,3 +187,20 @@ def test_recursive_event_scrubber(sentry_init, capture_events):
187187

188188
(event,) = events
189189
assert event["extra"]["deep"]["deeper"][0]["deepest"]["password"] == "'[Filtered]'"
190+
191+
192+
def test_recursive_scrubber_does_not_override_original(sentry_init, capture_events):
193+
sentry_init(event_scrubber=EventScrubber(recursive=True))
194+
events = capture_events()
195+
196+
data = {"csrf": "secret"}
197+
try:
198+
raise RuntimeError("An error")
199+
except Exception:
200+
capture_exception()
201+
202+
(event,) = events
203+
frames = event["exception"]["values"][0]["stacktrace"]["frames"]
204+
(frame,) = frames
205+
assert data["csrf"] == "secret"
206+
assert frame["vars"]["data"]["csrf"] == "[Filtered]"

0 commit comments

Comments
 (0)