From 928b0839107d0f1e790abce7f3311592a56310e7 Mon Sep 17 00:00:00 2001 From: jgreer013 <18727435+jgreer013@users.noreply.github.com> Date: Thu, 18 Jun 2026 10:29:00 -0700 Subject: [PATCH] fix(litellm): store bookkeeping span off-band, not in forwarded metadata With LiteLLMIntegration enabled, any call passing caller `metadata` crashed during request serialization. `_input_callback` stored the live Span in the caller's `metadata` dict, and some providers (e.g. Anthropic's /v1/messages passthrough) forward that dict into the outbound request body, so `json.dumps(request_body)` raised `TypeError: Object of type Span is not JSON serializable` before the request was sent. The span (holding the verbatim prompt under send_default_pii) could also leak to the provider. Stash the span on a top-level key of the per-request kwargs dict (litellm's `model_call_details`) that litellm threads through the input/success/failure callbacks, instead of in the forwarded `metadata` sub-dict. This ties the span's lifetime to the request with no module-level tracking, mirroring how the clickhouse/dramatiq integrations stash a span on their per-request object. The Anthropic request body is built only from recognized request params, not from `model_call_details`, so the span is never serialized onto the wire (verified end-to-end against the passthrough). Fixes #6596 Co-Authored-By: Claude Opus 4.8 (1M context) --- sentry_sdk/integrations/litellm.py | 33 +++++++------ tests/integrations/litellm/test_litellm.py | 56 ++++++++++++++++++++++ 2 files changed, 74 insertions(+), 15 deletions(-) diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index 402676defa..49ead6b068 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -31,16 +31,21 @@ raise DidNotEnable("LiteLLM not installed") -def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]": - """Get the metadata dictionary from the kwargs.""" - litellm_params = kwargs.setdefault("litellm_params", {}) +# Stash the span on a top-level key of the per-request kwargs dict litellm passes +# to every callback, so it lives and dies with the request. +_SPAN_KEY = "_sentry_span" - # we need this weird little dance, as metadata might be set but may be None initially - metadata = litellm_params.get("metadata") - if metadata is None: - metadata = {} - litellm_params["metadata"] = metadata - return metadata + +def _store_span(kwargs: "Dict[str, Any]", span: "Any") -> None: + kwargs[_SPAN_KEY] = span + + +def _peek_span(kwargs: "Dict[str, Any]") -> "Any": + return kwargs.get(_SPAN_KEY) + + +def _pop_span(kwargs: "Dict[str, Any]") -> "Any": + return kwargs.pop(_SPAN_KEY, None) def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]": @@ -117,8 +122,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: ) span.__enter__() - # Store span for later - _get_metadata_dict(kwargs)["_sentry_span"] = span + _store_span(kwargs, span) # Set basic data set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider) @@ -198,8 +202,7 @@ def _success_callback( ) -> None: """Handle successful completion.""" - metadata = _get_metadata_dict(kwargs) - span = metadata.get("_sentry_span") + span = _peek_span(kwargs) if span is None: return @@ -259,7 +262,7 @@ def _success_callback( or "complete_streaming_response" in kwargs or "async_complete_streaming_response" in kwargs ): - span = metadata.pop("_sentry_span", None) + span = _pop_span(kwargs) if span is not None: span.__exit__(None, None, None) @@ -285,7 +288,7 @@ def _failure_callback( end_time: "datetime", ) -> None: """Handle request failure.""" - span = _get_metadata_dict(kwargs).get("_sentry_span") + span = _pop_span(kwargs) if span is None: return diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 39e173049b..78cfba3b7f 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -5,6 +5,7 @@ from datetime import datetime from unittest import mock +import httpx import pytest import sentry_sdk @@ -2532,6 +2533,61 @@ def test_integration_setup(sentry_init): assert _failure_callback in (litellm.failure_callback or []) +@pytest.mark.asyncio(loop_scope="session") +async def test_anthropic_passthrough_request_stays_serializable( + reset_litellm_executor, sentry_init +): + """Regression test for GH-6596: litellm's Anthropic ``/v1/messages`` + passthrough forwards the caller's ``metadata`` into the request body, so the + integration must not make that body unserializable. Drive the real + passthrough with a mocked transport and assert the request body serializes. + """ + sentry_init( + integrations=[LiteLLMIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + captured = {} + anthropic_response = { + "id": "msg_1", + "type": "message", + "role": "assistant", + "content": [{"type": "text", "text": "Hi there"}], + "model": "claude-3-5-sonnet-latest", + "stop_reason": "end_turn", + "stop_sequence": None, + "usage": {"input_tokens": 1, "output_tokens": 1}, + } + + client = AsyncHTTPHandler() + + def capture_post(*args, **kwargs): + captured["data"] = kwargs.get("data") + return httpx.Response( + 200, + json=anthropic_response, + request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"), + ) + + with mock.patch.object(client, "post", side_effect=capture_post), start_transaction( + name="litellm test" + ): + await litellm.anthropic.messages.acreate( + model="anthropic/claude-3-5-sonnet-latest", + messages=[{"role": "user", "content": "Hello!"}], + max_tokens=16, + metadata={"user_id": "my-org"}, + api_key="test-key", + client=client, + ) + + assert "data" in captured + request_body = json.loads(captured["data"]) + assert request_body["metadata"] == {"user_id": "my-org"} + + def test_litellm_message_truncation(sentry_init, capture_events): """Test that large messages are truncated properly in LiteLLM integration.""" sentry_init(