Skip to content

Commit eb5ee4a

Browse files
authored
Do not truncate request body if request_bodies is "always" (#2092)
1 parent e0209db commit eb5ee4a

File tree

6 files changed

+168
-15
lines changed

6 files changed

+168
-15
lines changed

Diff for: sentry_sdk/client.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ def _prepare_event(
320320
# Postprocess the event here so that annotated types do
321321
# generally not surface in before_send
322322
if event is not None:
323-
event = serialize(event)
323+
event = serialize(event, request_bodies=self.options.get("request_bodies"))
324324

325325
before_send = self.options["before_send"]
326326
if (

Diff for: sentry_sdk/serializer.py

+43-11
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@
6767
# this value due to attached metadata, so keep the number conservative.
6868
MAX_EVENT_BYTES = 10**6
6969

70+
# Maximum depth and breadth of databags. Excess data will be trimmed. If
71+
# request_bodies is "always", request bodies won't be trimmed.
7072
MAX_DATABAG_DEPTH = 5
7173
MAX_DATABAG_BREADTH = 10
7274
CYCLE_MARKER = "<cyclic>"
@@ -118,6 +120,8 @@ def serialize(event, **kwargs):
118120
path = [] # type: List[Segment]
119121
meta_stack = [] # type: List[Dict[str, Any]]
120122

123+
keep_request_bodies = kwargs.pop("request_bodies", None) == "always" # type: bool
124+
121125
def _annotate(**meta):
122126
# type: (**Any) -> None
123127
while len(meta_stack) <= len(path):
@@ -182,10 +186,11 @@ def _is_databag():
182186
if rv in (True, None):
183187
return rv
184188

185-
p0 = path[0]
186-
if p0 == "request" and path[1] == "data":
187-
return True
189+
is_request_body = _is_request_body()
190+
if is_request_body in (True, None):
191+
return is_request_body
188192

193+
p0 = path[0]
189194
if p0 == "breadcrumbs" and path[1] == "values":
190195
path[2]
191196
return True
@@ -198,13 +203,24 @@ def _is_databag():
198203

199204
return False
200205

206+
def _is_request_body():
207+
# type: () -> Optional[bool]
208+
try:
209+
if path[0] == "request" and path[1] == "data":
210+
return True
211+
except IndexError:
212+
return None
213+
214+
return False
215+
201216
def _serialize_node(
202217
obj, # type: Any
203218
is_databag=None, # type: Optional[bool]
219+
is_request_body=None, # type: Optional[bool]
204220
should_repr_strings=None, # type: Optional[bool]
205221
segment=None, # type: Optional[Segment]
206-
remaining_breadth=None, # type: Optional[int]
207-
remaining_depth=None, # type: Optional[int]
222+
remaining_breadth=None, # type: Optional[Union[int, float]]
223+
remaining_depth=None, # type: Optional[Union[int, float]]
208224
):
209225
# type: (...) -> Any
210226
if segment is not None:
@@ -218,6 +234,7 @@ def _serialize_node(
218234
return _serialize_node_impl(
219235
obj,
220236
is_databag=is_databag,
237+
is_request_body=is_request_body,
221238
should_repr_strings=should_repr_strings,
222239
remaining_depth=remaining_depth,
223240
remaining_breadth=remaining_breadth,
@@ -242,9 +259,14 @@ def _flatten_annotated(obj):
242259
return obj
243260

244261
def _serialize_node_impl(
245-
obj, is_databag, should_repr_strings, remaining_depth, remaining_breadth
262+
obj,
263+
is_databag,
264+
is_request_body,
265+
should_repr_strings,
266+
remaining_depth,
267+
remaining_breadth,
246268
):
247-
# type: (Any, Optional[bool], Optional[bool], Optional[int], Optional[int]) -> Any
269+
# type: (Any, Optional[bool], Optional[bool], Optional[bool], Optional[Union[float, int]], Optional[Union[float, int]]) -> Any
248270
if isinstance(obj, AnnotatedValue):
249271
should_repr_strings = False
250272
if should_repr_strings is None:
@@ -253,10 +275,18 @@ def _serialize_node_impl(
253275
if is_databag is None:
254276
is_databag = _is_databag()
255277

256-
if is_databag and remaining_depth is None:
257-
remaining_depth = MAX_DATABAG_DEPTH
258-
if is_databag and remaining_breadth is None:
259-
remaining_breadth = MAX_DATABAG_BREADTH
278+
if is_request_body is None:
279+
is_request_body = _is_request_body()
280+
281+
if is_databag:
282+
if is_request_body and keep_request_bodies:
283+
remaining_depth = float("inf")
284+
remaining_breadth = float("inf")
285+
else:
286+
if remaining_depth is None:
287+
remaining_depth = MAX_DATABAG_DEPTH
288+
if remaining_breadth is None:
289+
remaining_breadth = MAX_DATABAG_BREADTH
260290

261291
obj = _flatten_annotated(obj)
262292

@@ -312,6 +342,7 @@ def _serialize_node_impl(
312342
segment=str_k,
313343
should_repr_strings=should_repr_strings,
314344
is_databag=is_databag,
345+
is_request_body=is_request_body,
315346
remaining_depth=remaining_depth - 1
316347
if remaining_depth is not None
317348
else None,
@@ -338,6 +369,7 @@ def _serialize_node_impl(
338369
segment=i,
339370
should_repr_strings=should_repr_strings,
340371
is_databag=is_databag,
372+
is_request_body=is_request_body,
341373
remaining_depth=remaining_depth - 1
342374
if remaining_depth is not None
343375
else None,

Diff for: tests/integrations/bottle/test_bottle.py

+32
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from io import BytesIO
99
from bottle import Bottle, debug as set_debug, abort, redirect
1010
from sentry_sdk import capture_message
11+
from sentry_sdk.serializer import MAX_DATABAG_BREADTH
1112

1213
from sentry_sdk.integrations.logging import LoggingIntegration
1314
from werkzeug.test import Client
@@ -275,6 +276,37 @@ def index():
275276
assert not event["request"]["data"]["file"]
276277

277278

279+
def test_json_not_truncated_if_request_bodies_is_always(
280+
sentry_init, capture_events, app, get_client
281+
):
282+
sentry_init(
283+
integrations=[bottle_sentry.BottleIntegration()], request_bodies="always"
284+
)
285+
286+
data = {
287+
"key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10)
288+
}
289+
290+
@app.route("/", method="POST")
291+
def index():
292+
import bottle
293+
294+
assert bottle.request.json == data
295+
assert bottle.request.body.read() == json.dumps(data).encode("ascii")
296+
capture_message("hi")
297+
return "ok"
298+
299+
events = capture_events()
300+
301+
client = get_client()
302+
303+
response = client.post("/", content_type="application/json", data=json.dumps(data))
304+
assert response[1] == "200 OK"
305+
306+
(event,) = events
307+
assert event["request"]["data"] == data
308+
309+
278310
@pytest.mark.parametrize(
279311
"integrations",
280312
[

Diff for: tests/integrations/flask/test_flask.py

+27
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
)
2929
from sentry_sdk.integrations.logging import LoggingIntegration
3030
import sentry_sdk.integrations.flask as flask_sentry
31+
from sentry_sdk.serializer import MAX_DATABAG_BREADTH
3132

3233

3334
login_manager = LoginManager()
@@ -447,6 +448,32 @@ def index():
447448
assert not event["request"]["data"]["file"]
448449

449450

451+
def test_json_not_truncated_if_request_bodies_is_always(
452+
sentry_init, capture_events, app
453+
):
454+
sentry_init(integrations=[flask_sentry.FlaskIntegration()], request_bodies="always")
455+
456+
data = {
457+
"key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10)
458+
}
459+
460+
@app.route("/", methods=["POST"])
461+
def index():
462+
assert request.get_json() == data
463+
assert request.get_data() == json.dumps(data).encode("ascii")
464+
capture_message("hi")
465+
return "ok"
466+
467+
events = capture_events()
468+
469+
client = app.test_client()
470+
response = client.post("/", content_type="application/json", data=json.dumps(data))
471+
assert response.status_code == 200
472+
473+
(event,) = events
474+
assert event["request"]["data"] == data
475+
476+
450477
@pytest.mark.parametrize(
451478
"integrations",
452479
[

Diff for: tests/integrations/pyramid/test_pyramid.py

+26
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from sentry_sdk import capture_message, add_breadcrumb
1414
from sentry_sdk.integrations.pyramid import PyramidIntegration
15+
from sentry_sdk.serializer import MAX_DATABAG_BREADTH
1516

1617
from werkzeug.test import Client
1718

@@ -192,6 +193,31 @@ def index(request):
192193
assert event["request"]["data"] == data
193194

194195

196+
def test_json_not_truncated_if_request_bodies_is_always(
197+
sentry_init, capture_events, route, get_client
198+
):
199+
sentry_init(integrations=[PyramidIntegration()], request_bodies="always")
200+
201+
data = {
202+
"key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10)
203+
}
204+
205+
@route("/")
206+
def index(request):
207+
assert request.json == data
208+
assert request.text == json.dumps(data)
209+
capture_message("hi")
210+
return Response("ok")
211+
212+
events = capture_events()
213+
214+
client = get_client()
215+
client.post("/", content_type="application/json", data=json.dumps(data))
216+
217+
(event,) = events
218+
assert event["request"]["data"] == data
219+
220+
195221
def test_files_and_form(sentry_init, capture_events, route, get_client):
196222
sentry_init(integrations=[PyramidIntegration()], request_bodies="always")
197223

Diff for: tests/test_serializer.py

+39-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import sys
33
import pytest
44

5-
from sentry_sdk.serializer import serialize
5+
from sentry_sdk.serializer import MAX_DATABAG_BREADTH, MAX_DATABAG_DEPTH, serialize
66

77
try:
88
from hypothesis import given
@@ -40,14 +40,24 @@ def inner(message, **kwargs):
4040

4141
@pytest.fixture
4242
def extra_normalizer(validate_event_schema):
43-
def inner(message, **kwargs):
44-
event = serialize({"extra": {"foo": message}}, **kwargs)
43+
def inner(extra, **kwargs):
44+
event = serialize({"extra": {"foo": extra}}, **kwargs)
4545
validate_event_schema(event)
4646
return event["extra"]["foo"]
4747

4848
return inner
4949

5050

51+
@pytest.fixture
52+
def body_normalizer(validate_event_schema):
53+
def inner(body, **kwargs):
54+
event = serialize({"request": {"data": body}}, **kwargs)
55+
validate_event_schema(event)
56+
return event["request"]["data"]
57+
58+
return inner
59+
60+
5161
def test_bytes_serialization_decode(message_normalizer):
5262
binary = b"abc123\x80\xf0\x9f\x8d\x95"
5363
result = message_normalizer(binary, should_repr_strings=False)
@@ -106,3 +116,29 @@ def test_custom_mapping_doesnt_mess_with_mock(extra_normalizer):
106116
m = mock.Mock()
107117
extra_normalizer(m)
108118
assert len(m.mock_calls) == 0
119+
120+
121+
def test_trim_databag_breadth(body_normalizer):
122+
data = {
123+
"key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10)
124+
}
125+
126+
result = body_normalizer(data)
127+
128+
assert len(result) == MAX_DATABAG_BREADTH
129+
for key, value in result.items():
130+
assert data.get(key) == value
131+
132+
133+
def test_no_trimming_if_request_bodies_is_always(body_normalizer):
134+
data = {
135+
"key{}".format(i): "value{}".format(i) for i in range(MAX_DATABAG_BREADTH + 10)
136+
}
137+
curr = data
138+
for _ in range(MAX_DATABAG_DEPTH + 5):
139+
curr["nested"] = {}
140+
curr = curr["nested"]
141+
142+
result = body_normalizer(data, request_bodies="always")
143+
144+
assert result == data

0 commit comments

Comments
 (0)