diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 008256e110..9147032ad7 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -819,6 +819,12 @@ class SPANDATA: Example: GET """ + HTTP_REQUEST_BODY_DATA = "http.request.body.data" + """ + HTTP request body data. Can be given as string or structural data of any format. + Example: "[{\"role\": \"user\", \"message\": \"hello\"}]" + """ + HTTP_REQUEST_METHOD = "http.request.method" """ The HTTP method used. diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index ddc58d7183..691a3dc0bb 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -8,7 +8,8 @@ from typing import TYPE_CHECKING import sentry_sdk -from sentry_sdk.consts import OP +from sentry_sdk._types import OVER_SIZE_LIMIT_SUBSTITUTE +from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import ( _DEFAULT_FAILED_REQUEST_STATUS_CODES, DidNotEnable, @@ -46,7 +47,9 @@ import starlette # type: ignore from starlette import __version__ as STARLETTE_VERSION from starlette.applications import Starlette # type: ignore - from starlette.datastructures import UploadFile # type: ignore + from starlette.datastructures import ( # type: ignore + UploadFile, + ) from starlette.middleware import Middleware # type: ignore from starlette.middleware.authentication import ( # type: ignore AuthenticationMiddleware, @@ -478,61 +481,113 @@ def _is_async_callable(obj: "Any") -> bool: ) -def patch_request_response() -> None: - old_request_response = starlette.routing.request_response +def _get_cached_request_body_attribute( + client: "sentry_sdk.client.BaseClient", request: "Request" +) -> "Optional[str]": + """ + Returns a stringified JSON representation of the request body if the request body is cached and within size bounds. + """ + if "content-length" not in request.headers: + return None - def _sentry_request_response(func: "Callable[[Any], Any]") -> "ASGIApp": - old_func = func + try: + content_length = int(request.headers["content-length"]) + except ValueError: + return None - is_coroutine = _is_async_callable(old_func) - if is_coroutine: + if content_length and not request_body_within_bounds(client, content_length): + return OVER_SIZE_LIMIT_SUBSTITUTE - async def _sentry_async_func(*args: "Any", **kwargs: "Any") -> "Any": - client = sentry_sdk.get_client() - integration = client.get_integration(StarletteIntegration) - if integration is None: - return await old_func(*args, **kwargs) + if hasattr(request, "_json"): + return json.dumps(request._json) - request = args[0] + formdata_body = getattr(request, "_form", None) + if formdata_body is None: + return None - _set_transaction_name_and_source( - sentry_sdk.get_current_scope(), - integration.transaction_style, - request, - ) + form_data = {} + for key, val in formdata_body.items(): + is_file = isinstance(val, UploadFile) + form_data[key] = val if not is_file else "[Unparsable]" - sentry_scope = sentry_sdk.get_isolation_scope() - extractor = StarletteRequestExtractor(request) - info = await extractor.extract_request_info() + return json.dumps(form_data) - def _make_request_event_processor( - req: "Any", integration: "Any" - ) -> "Callable[[Event, dict[str, Any]], Event]": - def event_processor( - event: "Event", hint: "Dict[str, Any]" - ) -> "Event": - # Add info from request to event - request_info = event.get("request", {}) - if info: - if "cookies" in info: - request_info["cookies"] = info["cookies"] - if "data" in info: - request_info["data"] = info["data"] - event["request"] = deepcopy(request_info) - return event +async def _wrap_async_handler( + handler: "Callable[..., Awaitable[Any]]", *args: "Any", **kwargs: "Any" +) -> "Any": + """ + Wraps an asynchronous handler function to attach request info to errors and the server segment span. + The request body cached on the Starlette Request object is attached to streamed spans, but consuming the request body in the event + processor can still cause application hangs. + """ + client = sentry_sdk.get_client() + integration = client.get_integration(StarletteIntegration) + if integration is None: + return await handler(*args, **kwargs) - return event_processor + request = args[0] - sentry_scope._name = StarletteIntegration.identifier - sentry_scope.add_event_processor( - _make_request_event_processor(request, integration) + _set_transaction_name_and_source( + sentry_sdk.get_current_scope(), + integration.transaction_style, + request, + ) + + sentry_scope = sentry_sdk.get_isolation_scope() + extractor = StarletteRequestExtractor(request) + + info = await extractor.extract_request_info() + + def _make_request_event_processor( + req: "Any", integration: "Any" + ) -> "Callable[[Event, dict[str, Any]], Event]": + def event_processor(event: "Event", hint: "Dict[str, Any]") -> "Event": + # Add info from request to event + request_info = event.get("request", {}) + if info: + if "cookies" in info: + request_info["cookies"] = info["cookies"] + if "data" in info: + request_info["data"] = info["data"] + event["request"] = deepcopy(request_info) + + return event + + return event_processor + + sentry_scope._name = StarletteIntegration.identifier + sentry_scope.add_event_processor( + _make_request_event_processor(request, integration) + ) + + try: + return await handler(*args, **kwargs) + finally: + current_span = _get_current_streamed_span() + + if type(current_span) is StreamedSpan: + request_body = _get_cached_request_body_attribute( + client=client, request=request + ) + if request_body: + current_span._segment.set_attribute( + SPANDATA.HTTP_REQUEST_BODY_DATA, + request_body, ) - if has_span_streaming_enabled(client.options): - _set_request_body_data_on_streaming_segment(info) - return await old_func(*args, **kwargs) +def patch_request_response() -> None: + old_request_response = starlette.routing.request_response + + def _sentry_request_response(func: "Callable[[Any], Any]") -> "ASGIApp": + old_func = func + + is_coroutine = _is_async_callable(old_func) + if is_coroutine: + + async def _sentry_async_func(*args: "Any", **kwargs: "Any") -> "Any": + return await _wrap_async_handler(old_func, *args, **kwargs) func = _sentry_async_func diff --git a/tests/integrations/starlette/test_starlette.py b/tests/integrations/starlette/test_starlette.py index b87619c992..22ae7c55a4 100644 --- a/tests/integrations/starlette/test_starlette.py +++ b/tests/integrations/starlette/test_starlette.py @@ -25,6 +25,7 @@ import sentry_sdk from sentry_sdk import capture_message, get_baggage, get_traceparent +from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations.asgi import SentryAsgiMiddleware from sentry_sdk.integrations.starlette import ( StarletteIntegration, @@ -273,114 +274,232 @@ async def my_send(*args, **kwargs): @pytest.mark.asyncio -async def test_request_info_json_body(sentry_init, capture_events): +@pytest.mark.parametrize("span_streaming", [True, False]) +async def test_request_info_json_body( + sentry_init, capture_events, capture_items, span_streaming +): sentry_init( traces_sample_rate=1.0, send_default_pii=True, integrations=[StarletteIntegration()], + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static", + }, ) starlette_app = starlette_app_factory() - events = capture_events() - client = TestClient(starlette_app) - client.post( - "/body/json", - json=BODY_JSON, - headers={ - "cookie": "yummy_cookie=choco; tasty_cookie=strawberry", - }, - ) - (event, transaction_event) = events + if span_streaming: + items = capture_items("event", "span") + + client.post( + "/body/json", + json=BODY_JSON, + headers={ + "cookie": "yummy_cookie=choco; tasty_cookie=strawberry", + }, + ) - assert event["request"]["cookies"] == { - "tasty_cookie": "strawberry", - "yummy_cookie": "choco", - } - assert event["request"]["data"] == BODY_JSON + (event,) = (item.payload for item in items if item.type == "event") + assert event["request"]["cookies"] == { + "tasty_cookie": "strawberry", + "yummy_cookie": "choco", + } + assert event["request"]["data"] == BODY_JSON - assert transaction_event["request"]["cookies"] == { - "tasty_cookie": "strawberry", - "yummy_cookie": "choco", - } - assert transaction_event["request"]["data"] == BODY_JSON + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + server_span = next( + span for span in spans if span["attributes"]["sentry.op"] == "http.server" + ) + + assert json.loads( + server_span["attributes"][SPANDATA.HTTP_REQUEST_BODY_DATA] + ) == {"some": "json", "for": "testing", "nested": {"numbers": 123}} + else: + events = capture_events() + + client.post( + "/body/json", + json=BODY_JSON, + headers={ + "cookie": "yummy_cookie=choco; tasty_cookie=strawberry", + }, + ) + + (event, transaction_event) = events + + assert event["request"]["cookies"] == { + "tasty_cookie": "strawberry", + "yummy_cookie": "choco", + } + assert event["request"]["data"] == BODY_JSON + + assert transaction_event["request"]["cookies"] == { + "tasty_cookie": "strawberry", + "yummy_cookie": "choco", + } + assert transaction_event["request"]["data"] == BODY_JSON @pytest.mark.asyncio -async def test_formdata_request_body(sentry_init, capture_events): +@pytest.mark.parametrize("span_streaming", [True, False]) +async def test_formdata_request_body( + sentry_init, capture_events, capture_items, span_streaming +): sentry_init( traces_sample_rate=1.0, send_default_pii=True, max_request_body_size="always", integrations=[StarletteIntegration()], + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static", + }, ) starlette_app = starlette_app_factory() - events = capture_events() - client = TestClient(starlette_app) - client.post( - "/body/form", - data=BODY_FORM.encode("utf-8"), - headers={ - "content-type": "multipart/form-data; boundary=fd721ef49ea403a6", - }, - ) - (event, transaction_event) = events - assert event["request"]["data"].keys() == PARSED_FORM.keys() - assert event["request"]["data"]["username"] == PARSED_FORM["username"] - assert event["request"]["data"]["password"] == "[Filtered]" - assert event["request"]["data"]["photo"] == "" - assert event["_meta"]["request"]["data"]["photo"] == {"": {"rem": [["!raw", "x"]]}} + if span_streaming: + items = capture_items("event", "span") + + client.post( + "/body/form", + data=BODY_FORM.encode("utf-8"), + headers={ + "content-type": "multipart/form-data; boundary=fd721ef49ea403a6", + }, + ) - assert transaction_event["request"]["data"].keys() == PARSED_FORM.keys() - assert transaction_event["request"]["data"]["username"] == PARSED_FORM["username"] - assert transaction_event["request"]["data"]["password"] == "[Filtered]" - assert transaction_event["request"]["data"]["photo"] == "" - assert transaction_event["_meta"]["request"]["data"]["photo"] == { - "": {"rem": [["!raw", "x"]]} - } + (event,) = (item.payload for item in items if item.type == "event") + assert event["request"]["data"].keys() == PARSED_FORM.keys() + assert event["request"]["data"]["username"] == PARSED_FORM["username"] + assert event["request"]["data"]["password"] == "[Filtered]" + assert event["request"]["data"]["photo"] == "" + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + server_span = next( + span for span in spans if span["attributes"]["sentry.op"] == "http.server" + ) + + # Going forward, the sanitization of data will need to happen within the `before_send_span` hooks + # See https://sentry.slack.com/archives/C09RR0KD2N7/p1776951331206129?thread_ts=1776951227.440659&cid=C09RR0KD2N7 + parsed_form_attribute = json.loads( + server_span["attributes"][SPANDATA.HTTP_REQUEST_BODY_DATA] + ) + assert parsed_form_attribute.keys() == PARSED_FORM.keys() + assert parsed_form_attribute["username"] == PARSED_FORM["username"] + assert parsed_form_attribute["password"] == "hello123" + assert parsed_form_attribute["photo"] == "[Unparsable]" + else: + events = capture_events() + + client.post( + "/body/form", + data=BODY_FORM.encode("utf-8"), + headers={ + "content-type": "multipart/form-data; boundary=fd721ef49ea403a6", + }, + ) + + (event, transaction_event) = events + assert event["request"]["data"].keys() == PARSED_FORM.keys() + assert event["request"]["data"]["username"] == PARSED_FORM["username"] + assert event["request"]["data"]["password"] == "[Filtered]" + assert event["request"]["data"]["photo"] == "" + assert event["_meta"]["request"]["data"]["photo"] == { + "": {"rem": [["!raw", "x"]]} + } + + assert transaction_event["request"]["data"].keys() == PARSED_FORM.keys() + assert ( + transaction_event["request"]["data"]["username"] == PARSED_FORM["username"] + ) + assert transaction_event["request"]["data"]["password"] == "[Filtered]" + assert transaction_event["request"]["data"]["photo"] == "" + assert transaction_event["_meta"]["request"]["data"]["photo"] == { + "": {"rem": [["!raw", "x"]]} + } @pytest.mark.asyncio -async def test_request_body_too_big(sentry_init, capture_events): +@pytest.mark.parametrize("span_streaming", [True, False]) +async def test_request_body_too_big( + sentry_init, capture_events, capture_items, span_streaming +): sentry_init( traces_sample_rate=1.0, send_default_pii=True, integrations=[StarletteIntegration()], + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static", + }, ) starlette_app = starlette_app_factory() - events = capture_events() - client = TestClient(starlette_app) - client.post( - "/body/form", - data=BODY_FORM.encode("utf-8"), - headers={ - "content-type": "multipart/form-data; boundary=fd721ef49ea403a6", - "cookie": "yummy_cookie=choco; tasty_cookie=strawberry", - }, - ) - (event, transaction_event) = events - assert event["request"]["cookies"] == { - "tasty_cookie": "strawberry", - "yummy_cookie": "choco", - } - # Because request is too big only the AnnotatedValue is extracted. - assert event["_meta"]["request"]["data"] == {"": {"rem": [["!config", "x"]]}} - - assert transaction_event["request"]["cookies"] == { - "tasty_cookie": "strawberry", - "yummy_cookie": "choco", - } - # Because request is too big only the AnnotatedValue is extracted. - assert transaction_event["_meta"]["request"]["data"] == { - "": {"rem": [["!config", "x"]]} - } + if span_streaming: + items = capture_items("event", "span") + + client.post( + "/body/form", + data=BODY_FORM.encode("utf-8"), + headers={ + "content-type": "multipart/form-data; boundary=fd721ef49ea403a6", + "cookie": "yummy_cookie=choco; tasty_cookie=strawberry", + }, + ) + + (event,) = (item.payload for item in items if item.type == "event") + assert event["request"]["cookies"] == { + "tasty_cookie": "strawberry", + "yummy_cookie": "choco", + } + # Because request is too big only the AnnotatedValue is extracted. + assert event["_meta"]["request"]["data"] == {"": {"rem": [["!config", "x"]]}} + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + server_span = next( + span for span in spans if span["attributes"]["sentry.op"] == "http.server" + ) + + # Because request is too big only the AnnotatedValue is extracted. + assert ( + server_span["attributes"][SPANDATA.HTTP_REQUEST_BODY_DATA] + == "[Exceeds maximum size]" + ) + else: + events = capture_events() + + client.post( + "/body/form", + data=BODY_FORM.encode("utf-8"), + headers={ + "content-type": "multipart/form-data; boundary=fd721ef49ea403a6", + "cookie": "yummy_cookie=choco; tasty_cookie=strawberry", + }, + ) + + (event, transaction_event) = events + assert event["request"]["cookies"] == { + "tasty_cookie": "strawberry", + "yummy_cookie": "choco", + } + # Because request is too big only the AnnotatedValue is extracted. + assert event["_meta"]["request"]["data"] == {"": {"rem": [["!config", "x"]]}} + + assert transaction_event["request"]["cookies"] == { + "tasty_cookie": "strawberry", + "yummy_cookie": "choco", + } + # Because request is too big only the AnnotatedValue is extracted. + assert transaction_event["_meta"]["request"]["data"] == { + "": {"rem": [["!config", "x"]]} + } @pytest.mark.asyncio @@ -987,134 +1106,6 @@ def test_active_thread_id_span_streaming(sentry_init, capture_items, endpoint): assert str(data["active"]) == segments[0]["attributes"]["thread.id"] -def _post_body_app(handler_awaitable): - async def _handler(request): - await handler_awaitable(request) - return starlette.responses.JSONResponse({"ok": True}) - - return starlette.applications.Starlette( - routes=[starlette.routing.Route("/body", _handler, methods=["POST"])], - ) - - -@pytest.mark.parametrize("middleware_spans", [False, True]) -def test_request_body_data_does_not_scrub_pii_span_streaming( - sentry_init, capture_items, middleware_spans -): - sentry_init( - auto_enabling_integrations=False, - integrations=[StarletteIntegration(middleware_spans=middleware_spans)], - traces_sample_rate=1.0, - _experiments={"trace_lifecycle": "stream"}, - ) - - async def _read_json(request): - await request.json() - - items = capture_items("span") - - client = TestClient(_post_body_app(_read_json)) - response = client.post( - "/body", - json={ - "password": "ohno", - "authorization": "Bearer token", - "message": "hello", - }, - ) - assert response.status_code == 200 - - sentry_sdk.flush() - - segments = [item.payload for item in items if item.payload.get("is_segment")] - assert len(segments) == 1 - attr = segments[0]["attributes"]["http.request.body.data"] - - # Going forward, the sanitization of data will need to happen within the `before_send_span` hooks - # See https://sentry.slack.com/archives/C09RR0KD2N7/p1776951331206129?thread_ts=1776951227.440659&cid=C09RR0KD2N7 - assert "ohno" in attr - assert "Bearer token" in attr - assert "hello" in attr - - -@pytest.mark.skipif( - STARLETTE_VERSION < (0, 21), - reason="Requires Starlette >= 0.21, because earlier versions use a requests-based TestClient which does not support the 'content' kwarg", -) -@pytest.mark.parametrize("middleware_spans", [False, True]) -def test_request_body_data_annotated_value_top_level_span_streaming( - sentry_init, capture_items, middleware_spans -): - sentry_init( - auto_enabling_integrations=False, - integrations=[StarletteIntegration(middleware_spans=middleware_spans)], - traces_sample_rate=1.0, - _experiments={"trace_lifecycle": "stream"}, - ) - - async def _read_body(request): - await request.body() - - items = capture_items("span") - - client = TestClient(_post_body_app(_read_body)) - response = client.post( - "/body", - content=b"not json and not form", - headers={"content-type": "application/octet-stream"}, - ) - assert response.status_code == 200 - - sentry_sdk.flush() - - segments = [item.payload for item in items if item.payload.get("is_segment")] - assert len(segments) == 1 - attr = segments[0]["attributes"]["http.request.body.data"] - - assert isinstance(attr, str) - assert ( - attr == '""' - ) # AnnotatedValue.removed_because_raw_data is called because the content was not able to be parsed, and replaces the value with an empty string - - -@pytest.mark.parametrize("middleware_spans", [False, True]) -def test_request_body_data_annotated_value_nested_span_streaming( - sentry_init, capture_items, middleware_spans -): - pytest.importorskip("multipart") - - sentry_init( - auto_enabling_integrations=False, - integrations=[StarletteIntegration(middleware_spans=middleware_spans)], - traces_sample_rate=1.0, - _experiments={"trace_lifecycle": "stream"}, - ) - - async def _read_form(request): - await request.form() - - items = capture_items("span") - - client = TestClient(_post_body_app(_read_form)) - response = client.post( - "/body", - data={"name": "erica"}, - files={"avatar": ("photo.jpg", b"fake-bytes", "image/jpeg")}, - ) - assert response.status_code == 200 - - sentry_sdk.flush() - - segments = [item.payload for item in items if item.payload.get("is_segment")] - assert len(segments) == 1 - attr = segments[0]["attributes"]["http.request.body.data"] - - assert isinstance(attr, str) - parsed = json.loads(attr) - assert parsed["name"] == "erica" - assert "fake-bytes" not in attr - - def test_original_request_not_scrubbed(sentry_init, capture_events): sentry_init(integrations=[StarletteIntegration()])