Skip to content

Do not truncate body if request_bodies is "always" #2092

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sentry_sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ def _prepare_event(
# Postprocess the event here so that annotated types do
# generally not surface in before_send
if event is not None:
event = serialize(event)
event = serialize(event, request_bodies=self.options.get("request_bodies"))

before_send = self.options["before_send"]
if (
Expand Down
54 changes: 43 additions & 11 deletions sentry_sdk/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
# this value due to attached metadata, so keep the number conservative.
MAX_EVENT_BYTES = 10**6

# Maximum depth and breadth of databags. Excess data will be trimmed. If
# request_bodies is "always", request bodies won't be trimmed.
MAX_DATABAG_DEPTH = 5
MAX_DATABAG_BREADTH = 10
CYCLE_MARKER = "<cyclic>"
Expand Down Expand Up @@ -118,6 +120,8 @@ def serialize(event, **kwargs):
path = [] # type: List[Segment]
meta_stack = [] # type: List[Dict[str, Any]]

keep_request_bodies = kwargs.pop("request_bodies", None) == "always" # type: bool

def _annotate(**meta):
# type: (**Any) -> None
while len(meta_stack) <= len(path):
Expand Down Expand Up @@ -182,10 +186,11 @@ def _is_databag():
if rv in (True, None):
return rv

p0 = path[0]
if p0 == "request" and path[1] == "data":
return True
is_request_body = _is_request_body()
if is_request_body in (True, None):
return is_request_body

p0 = path[0]
if p0 == "breadcrumbs" and path[1] == "values":
path[2]
return True
Expand All @@ -198,13 +203,24 @@ def _is_databag():

return False

def _is_request_body():
# type: () -> Optional[bool]
try:
if path[0] == "request" and path[1] == "data":
return True
except IndexError:
return None

return False

def _serialize_node(
obj, # type: Any
is_databag=None, # type: Optional[bool]
is_request_body=None, # type: Optional[bool]
should_repr_strings=None, # type: Optional[bool]
segment=None, # type: Optional[Segment]
remaining_breadth=None, # type: Optional[int]
remaining_depth=None, # type: Optional[int]
remaining_breadth=None, # type: Optional[Union[int, float]]
remaining_depth=None, # type: Optional[Union[int, float]]
):
# type: (...) -> Any
if segment is not None:
Expand All @@ -218,6 +234,7 @@ def _serialize_node(
return _serialize_node_impl(
obj,
is_databag=is_databag,
is_request_body=is_request_body,
should_repr_strings=should_repr_strings,
remaining_depth=remaining_depth,
remaining_breadth=remaining_breadth,
Expand All @@ -242,9 +259,14 @@ def _flatten_annotated(obj):
return obj

def _serialize_node_impl(
obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth
obj,
is_databag,
is_request_body,
should_repr_strings,
remaining_depth,
remaining_breadth,
):
# type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any
# type: (Any, Optional[bool], Optional[bool], Optional[bool], Optional[Union[float, int]], Optional[Union[float, int]]) -> Any
if isinstance(obj, AnnotatedValue):
should_repr_strings = False
if should_repr_strings is None:
Expand All @@ -253,10 +275,18 @@ def _serialize_node_impl(
if is_databag is None:
is_databag = _is_databag()

if is_databag and remaining_depth is None:
remaining_depth = MAX_DATABAG_DEPTH
if is_databag and remaining_breadth is None:
remaining_breadth = MAX_DATABAG_BREADTH
if is_request_body is None:
is_request_body = _is_request_body()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this needed? not sure if it ever is true if it's already not true above?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea was to distinguish between something being a request body vs. any other type of databag. Basically so that if request_bodies is always, we can exclude actual request bodies from being trimmed, but any other databags like e.g. breadcrumbs will still get trimmed. Not sure if this is something we actually want though. We could just as well trim all kinds of databags.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes that I get, but I wasn't able to see where this code branch is triggered.
But now I see that is_request_body is never passed into any of the _serialize_node calls, so either can you remove is_request_body as a function arg or maybe pass it in?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yeah, thanks for spotting that


if is_databag:
if is_request_body and keep_request_bodies:
remaining_depth = float("inf")
remaining_breadth = float("inf")
else:
if remaining_depth is None:
remaining_depth = MAX_DATABAG_DEPTH
if remaining_breadth is None:
remaining_breadth = MAX_DATABAG_BREADTH

obj = _flatten_annotated(obj)

Expand Down Expand Up @@ -312,6 +342,7 @@ def _serialize_node_impl(
segment=str_k,
should_repr_strings=should_repr_strings,
is_databag=is_databag,
is_request_body=is_request_body,
remaining_depth=remaining_depth - 1
if remaining_depth is not None
else None,
Expand All @@ -338,6 +369,7 @@ def _serialize_node_impl(
segment=i,
should_repr_strings=should_repr_strings,
is_databag=is_databag,
is_request_body=is_request_body,
remaining_depth=remaining_depth - 1
if remaining_depth is not None
else None,
Expand Down
32 changes: 32 additions & 0 deletions tests/integrations/bottle/test_bottle.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from io import BytesIO
from bottle import Bottle, debug as set_debug, abort, redirect
from sentry_sdk import capture_message
from sentry_sdk.serializer import MAX_DATABAG_BREADTH

from sentry_sdk.integrations.logging import LoggingIntegration
from werkzeug.test import Client
Expand Down Expand Up @@ -275,6 +276,37 @@ def index():
assert not event["request"]["data"]["file"]


def test_json_not_truncated_if_request_bodies_is_always(
sentry_init, capture_events, app, get_client
):
sentry_init(
integrations=[bottle_sentry.BottleIntegration()], request_bodies="always"
)

data = {
"key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10)
}

@app.route("/", method="POST")
def index():
import bottle

assert bottle.request.json == data
assert bottle.request.body.read() == json.dumps(data).encode("ascii")
capture_message("hi")
return "ok"

events = capture_events()

client = get_client()

response = client.post("/", content_type="application/json", data=json.dumps(data))
assert response[1] == "200 OK"

(event,) = events
assert event["request"]["data"] == data


@pytest.mark.parametrize(
"integrations",
[
Expand Down
27 changes: 27 additions & 0 deletions tests/integrations/flask/test_flask.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
)
from sentry_sdk.integrations.logging import LoggingIntegration
import sentry_sdk.integrations.flask as flask_sentry
from sentry_sdk.serializer import MAX_DATABAG_BREADTH


login_manager = LoginManager()
Expand Down Expand Up @@ -447,6 +448,32 @@ def index():
assert not event["request"]["data"]["file"]


def test_json_not_truncated_if_request_bodies_is_always(
sentry_init, capture_events, app
):
sentry_init(integrations=[flask_sentry.FlaskIntegration()], request_bodies="always")

data = {
"key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10)
}

@app.route("/", methods=["POST"])
def index():
assert request.get_json() == data
assert request.get_data() == json.dumps(data).encode("ascii")
capture_message("hi")
return "ok"

events = capture_events()

client = app.test_client()
response = client.post("/", content_type="application/json", data=json.dumps(data))
assert response.status_code == 200

(event,) = events
assert event["request"]["data"] == data


@pytest.mark.parametrize(
"integrations",
[
Expand Down
26 changes: 26 additions & 0 deletions tests/integrations/pyramid/test_pyramid.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from sentry_sdk import capture_message, add_breadcrumb
from sentry_sdk.integrations.pyramid import PyramidIntegration
from sentry_sdk.serializer import MAX_DATABAG_BREADTH

from werkzeug.test import Client

Expand Down Expand Up @@ -192,6 +193,31 @@ def index(request):
assert event["request"]["data"] == data


def test_json_not_truncated_if_request_bodies_is_always(
sentry_init, capture_events, route, get_client
):
sentry_init(integrations=[PyramidIntegration()], request_bodies="always")

data = {
"key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10)
}

@route("/")
def index(request):
assert request.json == data
assert request.text == json.dumps(data)
capture_message("hi")
return Response("ok")

events = capture_events()

client = get_client()
client.post("/", content_type="application/json", data=json.dumps(data))

(event,) = events
assert event["request"]["data"] == data


def test_files_and_form(sentry_init, capture_events, route, get_client):
sentry_init(integrations=[PyramidIntegration()], request_bodies="always")

Expand Down
42 changes: 39 additions & 3 deletions tests/test_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import sys
import pytest

from sentry_sdk.serializer import serialize
from sentry_sdk.serializer import MAX_DATABAG_BREADTH, MAX_DATABAG_DEPTH, serialize

try:
from hypothesis import given
Expand Down Expand Up @@ -40,14 +40,24 @@ def inner(message, **kwargs):

@pytest.fixture
def extra_normalizer(validate_event_schema):
def inner(message, **kwargs):
event = serialize({"extra": {"foo": message}}, **kwargs)
def inner(extra, **kwargs):
event = serialize({"extra": {"foo": extra}}, **kwargs)
validate_event_schema(event)
return event["extra"]["foo"]

return inner


@pytest.fixture
def body_normalizer(validate_event_schema):
def inner(body, **kwargs):
event = serialize({"request": {"data": body}}, **kwargs)
validate_event_schema(event)
return event["request"]["data"]

return inner


def test_bytes_serialization_decode(message_normalizer):
binary = b"abc123\x80\xf0\x9f\x8d\x95"
result = message_normalizer(binary, should_repr_strings=False)
Expand Down Expand Up @@ -106,3 +116,29 @@ def test_custom_mapping_doesnt_mess_with_mock(extra_normalizer):
m = mock.Mock()
extra_normalizer(m)
assert len(m.mock_calls) == 0


def test_trim_databag_breadth(body_normalizer):
data = {
"key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10)
}

result = body_normalizer(data)

assert len(result) == MAX_DATABAG_BREADTH
for key, value in result.items():
assert data.get(key) == value


def test_no_trimming_if_request_bodies_is_always(body_normalizer):
data = {
"key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10)
}
curr = data
for _ in range(MAX_DATABAG_DEPTH + 5):
curr["nested"] = {}
curr = curr["nested"]

result = body_normalizer(data, request_bodies="always")

assert result == data