From a847f339662b12f777bceb2fd6ae10c00a6aeeb6 Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Tue, 9 May 2023 10:21:43 +0200 Subject: [PATCH 1/4] Do not truncate body if request_bodies is always --- sentry_sdk/client.py | 2 +- sentry_sdk/serializer.py | 50 ++++++++++++++++++---- tests/integrations/bottle/test_bottle.py | 32 ++++++++++++++ tests/integrations/flask/test_flask.py | 27 ++++++++++++ tests/integrations/pyramid/test_pyramid.py | 26 +++++++++++ tests/test_serializer.py | 42 ++++++++++++++++-- 6 files changed, 166 insertions(+), 13 deletions(-) diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 1182922dd4..204b99ce0c 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -320,7 +320,7 @@ def _prepare_event( # Postprocess the event here so that annotated types do # generally not surface in before_send if event is not None: - event = serialize(event) + event = serialize(event, request_bodies=self.options.get("request_bodies")) before_send = self.options["before_send"] if ( diff --git a/sentry_sdk/serializer.py b/sentry_sdk/serializer.py index 22eec490ae..3d7701a0f8 100644 --- a/sentry_sdk/serializer.py +++ b/sentry_sdk/serializer.py @@ -67,6 +67,8 @@ # this value due to attached metadata, so keep the number conservative. MAX_EVENT_BYTES = 10**6 +# Maximum depth and breadth of databags. Excess data will be trimmed. If +# request_bodies is "always", request bodies won't be trimmed. MAX_DATABAG_DEPTH = 5 MAX_DATABAG_BREADTH = 10 CYCLE_MARKER = "" @@ -118,6 +120,10 @@ def serialize(event, **kwargs): path = [] # type: List[Segment] meta_stack = [] # type: List[Dict[str, Any]] + do_not_trim_request_bodies = ( + kwargs.pop("request_bodies", None) == "always" + ) # type: bool + def _annotate(**meta): # type: (**Any) -> None while len(meta_stack) <= len(path): @@ -182,10 +188,11 @@ def _is_databag(): if rv in (True, None): return rv - p0 = path[0] - if p0 == "request" and path[1] == "data": - return True + is_request_body = _is_request_body() + if is_request_body in (True, None): + return is_request_body + p0 = path[0] if p0 == "breadcrumbs" and path[1] == "values": path[2] return True @@ -198,9 +205,20 @@ def _is_databag(): return False + def _is_request_body(): + # type: () -> Optional[bool] + try: + if path[0] == "request" and path[1] == "data": + return True + except IndexError: + return None + + return False + def _serialize_node( obj, # type: Any is_databag=None, # type: Optional[bool] + is_request_body=None, # type: Optional[bool] should_repr_strings=None, # type: Optional[bool] segment=None, # type: Optional[Segment] remaining_breadth=None, # type: Optional[int] @@ -218,6 +236,7 @@ def _serialize_node( return _serialize_node_impl( obj, is_databag=is_databag, + is_request_body=is_request_body, should_repr_strings=should_repr_strings, remaining_depth=remaining_depth, remaining_breadth=remaining_breadth, @@ -242,9 +261,14 @@ def _flatten_annotated(obj): return obj def _serialize_node_impl( - obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth + obj, + is_databag, + is_request_body, + should_repr_strings, + remaining_depth, + remaining_breadth, ): - # type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any + # type: (Any, Optional[bool], Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any if isinstance(obj, AnnotatedValue): should_repr_strings = False if should_repr_strings is None: @@ -253,10 +277,18 @@ def _serialize_node_impl( if is_databag is None: is_databag = _is_databag() - if is_databag and remaining_depth is None: - remaining_depth = MAX_DATABAG_DEPTH - if is_databag and remaining_breadth is None: - remaining_breadth = MAX_DATABAG_BREADTH + if is_request_body is None: + is_request_body = _is_request_body() + + if is_databag: + if is_request_body and do_not_trim_request_bodies: + remaining_depth = float("inf") + remaining_breadth = float("inf") + else: + if remaining_depth is None: + remaining_depth = MAX_DATABAG_DEPTH + if remaining_breadth is None: + remaining_breadth = MAX_DATABAG_BREADTH obj = _flatten_annotated(obj) diff --git a/tests/integrations/bottle/test_bottle.py b/tests/integrations/bottle/test_bottle.py index dfd6e52f80..206ba1cefd 100644 --- a/tests/integrations/bottle/test_bottle.py +++ b/tests/integrations/bottle/test_bottle.py @@ -8,6 +8,7 @@ from io import BytesIO from bottle import Bottle, debug as set_debug, abort, redirect from sentry_sdk import capture_message +from sentry_sdk.serializer import MAX_DATABAG_BREADTH from sentry_sdk.integrations.logging import LoggingIntegration from werkzeug.test import Client @@ -275,6 +276,37 @@ def index(): assert not event["request"]["data"]["file"] +def test_json_not_truncated_if_request_bodies_is_always( + sentry_init, capture_events, app, get_client +): + sentry_init( + integrations=[bottle_sentry.BottleIntegration()], request_bodies="always" + ) + + data = { + "key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10) + } + + @app.route("/", method="POST") + def index(): + import bottle + + assert bottle.request.json == data + assert bottle.request.body.read() == json.dumps(data).encode("ascii") + capture_message("hi") + return "ok" + + events = capture_events() + + client = get_client() + + response = client.post("/", content_type="application/json", data=json.dumps(data)) + assert response[1] == "200 OK" + + (event,) = events + assert event["request"]["data"] == data + + @pytest.mark.parametrize( "integrations", [ diff --git a/tests/integrations/flask/test_flask.py b/tests/integrations/flask/test_flask.py index 8983c4e5ff..b5ac498dd6 100644 --- a/tests/integrations/flask/test_flask.py +++ b/tests/integrations/flask/test_flask.py @@ -28,6 +28,7 @@ ) from sentry_sdk.integrations.logging import LoggingIntegration import sentry_sdk.integrations.flask as flask_sentry +from sentry_sdk.serializer import MAX_DATABAG_BREADTH login_manager = LoginManager() @@ -447,6 +448,32 @@ def index(): assert not event["request"]["data"]["file"] +def test_json_not_truncated_if_request_bodies_is_always( + sentry_init, capture_events, app +): + sentry_init(integrations=[flask_sentry.FlaskIntegration()], request_bodies="always") + + data = { + "key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10) + } + + @app.route("/", methods=["POST"]) + def index(): + assert request.get_json() == data + assert request.get_data() == json.dumps(data).encode("ascii") + capture_message("hi") + return "ok" + + events = capture_events() + + client = app.test_client() + response = client.post("/", content_type="application/json", data=json.dumps(data)) + assert response.status_code == 200 + + (event,) = events + assert event["request"]["data"] == data + + @pytest.mark.parametrize( "integrations", [ diff --git a/tests/integrations/pyramid/test_pyramid.py b/tests/integrations/pyramid/test_pyramid.py index 0f8755ac6b..01dd1c6a04 100644 --- a/tests/integrations/pyramid/test_pyramid.py +++ b/tests/integrations/pyramid/test_pyramid.py @@ -12,6 +12,7 @@ from sentry_sdk import capture_message, add_breadcrumb from sentry_sdk.integrations.pyramid import PyramidIntegration +from sentry_sdk.serializer import MAX_DATABAG_BREADTH from werkzeug.test import Client @@ -192,6 +193,31 @@ def index(request): assert event["request"]["data"] == data +def test_json_not_truncated_if_request_bodies_is_always( + sentry_init, capture_events, route, get_client +): + sentry_init(integrations=[PyramidIntegration()], request_bodies="always") + + data = { + "key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10) + } + + @route("/") + def index(request): + assert request.json == data + assert request.text == json.dumps(data) + capture_message("hi") + return Response("ok") + + events = capture_events() + + client = get_client() + client.post("/", content_type="application/json", data=json.dumps(data)) + + (event,) = events + assert event["request"]["data"] == data + + def test_files_and_form(sentry_init, capture_events, route, get_client): sentry_init(integrations=[PyramidIntegration()], request_bodies="always") diff --git a/tests/test_serializer.py b/tests/test_serializer.py index 1e28daa2f1..e4272863b8 100644 --- a/tests/test_serializer.py +++ b/tests/test_serializer.py @@ -2,7 +2,7 @@ import sys import pytest -from sentry_sdk.serializer import serialize +from sentry_sdk.serializer import MAX_DATABAG_BREADTH, MAX_DATABAG_DEPTH, serialize try: from hypothesis import given @@ -40,14 +40,24 @@ def inner(message, **kwargs): @pytest.fixture def extra_normalizer(validate_event_schema): - def inner(message, **kwargs): - event = serialize({"extra": {"foo": message}}, **kwargs) + def inner(extra, **kwargs): + event = serialize({"extra": {"foo": extra}}, **kwargs) validate_event_schema(event) return event["extra"]["foo"] return inner +@pytest.fixture +def body_normalizer(validate_event_schema): + def inner(body, **kwargs): + event = serialize({"request": {"data": body}}, **kwargs) + validate_event_schema(event) + return event["request"]["data"] + + return inner + + def test_bytes_serialization_decode(message_normalizer): binary = b"abc123\x80\xf0\x9f\x8d\x95" result = message_normalizer(binary, should_repr_strings=False) @@ -106,3 +116,29 @@ def test_custom_mapping_doesnt_mess_with_mock(extra_normalizer): m = mock.Mock() extra_normalizer(m) assert len(m.mock_calls) == 0 + + +def test_trim_databag_breadth(body_normalizer): + data = { + "key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10) + } + + result = body_normalizer(data) + + assert len(result) == MAX_DATABAG_BREADTH + for key, value in result.items(): + assert data.get(key) == value + + +def test_no_trimming_if_request_bodies_is_always(body_normalizer): + data = { + "key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10) + } + curr = data + for _ in range(MAX_DATABAG_DEPTH + 5): + curr["nested"] = {} + curr = data["nested"] + + result = body_normalizer(data, request_bodies="always") + + assert result == data From 17eb786652fd44a1000d1f7c37b51a878e515a90 Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Tue, 9 May 2023 15:18:46 +0200 Subject: [PATCH 2/4] Fix types --- sentry_sdk/serializer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/serializer.py b/sentry_sdk/serializer.py index 3d7701a0f8..08acc0415f 100644 --- a/sentry_sdk/serializer.py +++ b/sentry_sdk/serializer.py @@ -221,8 +221,8 @@ def _serialize_node( is_request_body=None, # type: Optional[bool] should_repr_strings=None, # type: Optional[bool] segment=None, # type: Optional[Segment] - remaining_breadth=None, # type: Optional[int] - remaining_depth=None, # type: Optional[int] + remaining_breadth=None, # type: Optional[Union[int, float]] + remaining_depth=None, # type: Optional[Union[int, float]] ): # type: (...) -> Any if segment is not None: @@ -268,7 +268,7 @@ def _serialize_node_impl( remaining_depth, remaining_breadth, ): - # type: (Any, Optional[bool], Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any + # type: (Any, Optional[bool], Optional[bool], Optional[bool], Optional[Union[float, int]], Optional[Union[float, int]]) -> Any if isinstance(obj, AnnotatedValue): should_repr_strings = False if should_repr_strings is None: From 26400df3f9c63fd12a38e29cd8c0a0d979ba3b5a Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Tue, 9 May 2023 15:21:20 +0200 Subject: [PATCH 3/4] Fix test --- tests/test_serializer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_serializer.py b/tests/test_serializer.py index e4272863b8..5bb0579d5a 100644 --- a/tests/test_serializer.py +++ b/tests/test_serializer.py @@ -137,7 +137,7 @@ def test_no_trimming_if_request_bodies_is_always(body_normalizer): curr = data for _ in range(MAX_DATABAG_DEPTH + 5): curr["nested"] = {} - curr = data["nested"] + curr = curr["nested"] result = body_normalizer(data, request_bodies="always") From 76c1cb59c2ae778bedf57b71048b429f780cc4c6 Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Wed, 10 May 2023 11:48:29 +0200 Subject: [PATCH 4/4] review fixes --- sentry_sdk/serializer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/serializer.py b/sentry_sdk/serializer.py index 08acc0415f..b3f8012c28 100644 --- a/sentry_sdk/serializer.py +++ b/sentry_sdk/serializer.py @@ -120,9 +120,7 @@ def serialize(event, **kwargs): path = [] # type: List[Segment] meta_stack = [] # type: List[Dict[str, Any]] - do_not_trim_request_bodies = ( - kwargs.pop("request_bodies", None) == "always" - ) # type: bool + keep_request_bodies = kwargs.pop("request_bodies", None) == "always" # type: bool def _annotate(**meta): # type: (**Any) -> None @@ -281,7 +279,7 @@ def _serialize_node_impl( is_request_body = _is_request_body() if is_databag: - if is_request_body and do_not_trim_request_bodies: + if is_request_body and keep_request_bodies: remaining_depth = float("inf") remaining_breadth = float("inf") else: @@ -344,6 +342,7 @@ def _serialize_node_impl( segment=str_k, should_repr_strings=should_repr_strings, is_databag=is_databag, + is_request_body=is_request_body, remaining_depth=remaining_depth - 1 if remaining_depth is not None else None, @@ -370,6 +369,7 @@ def _serialize_node_impl( segment=i, should_repr_strings=should_repr_strings, is_databag=is_databag, + is_request_body=is_request_body, remaining_depth=remaining_depth - 1 if remaining_depth is not None else None,