From 928b0839107d0f1e790abce7f3311592a56310e7 Mon Sep 17 00:00:00 2001
From: jgreer013 <18727435+jgreer013@users.noreply.github.com>
Date: Thu, 18 Jun 2026 10:29:00 -0700
Subject: [PATCH] fix(litellm): store bookkeeping span off-band, not in
 forwarded metadata

With LiteLLMIntegration enabled, any call passing caller `metadata` crashed
during request serialization. `_input_callback` stored the live Span in the
caller's `metadata` dict, and some providers (e.g. Anthropic's /v1/messages
passthrough) forward that dict into the outbound request body, so
`json.dumps(request_body)` raised `TypeError: Object of type Span is not JSON
serializable` before the request was sent. The span (holding the verbatim
prompt under send_default_pii) could also leak to the provider.

Stash the span on a top-level key of the per-request kwargs dict
(litellm's `model_call_details`) that litellm threads through the
input/success/failure callbacks, instead of in the forwarded `metadata`
sub-dict. This ties the span's lifetime to the request with no module-level
tracking, mirroring how the clickhouse/dramatiq integrations stash a span on
their per-request object. The Anthropic request body is built only from
recognized request params, not from `model_call_details`, so the span is
never serialized onto the wire (verified end-to-end against the passthrough).

Fixes #6596

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 sentry_sdk/integrations/litellm.py         | 33 +++++++------
 tests/integrations/litellm/test_litellm.py | 56 ++++++++++++++++++++++
 2 files changed, 74 insertions(+), 15 deletions(-)

diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py
index 402676defa..49ead6b068 100644
--- a/sentry_sdk/integrations/litellm.py
+++ b/sentry_sdk/integrations/litellm.py
@@ -31,16 +31,21 @@
     raise DidNotEnable("LiteLLM not installed")
 
 
-def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]":
-    """Get the metadata dictionary from the kwargs."""
-    litellm_params = kwargs.setdefault("litellm_params", {})
+# Stash the span on a top-level key of the per-request kwargs dict litellm passes
+# to every callback, so it lives and dies with the request.
+_SPAN_KEY = "_sentry_span"
 
-    # we need this weird little dance, as metadata might be set but may be None initially
-    metadata = litellm_params.get("metadata")
-    if metadata is None:
-        metadata = {}
-        litellm_params["metadata"] = metadata
-    return metadata
+
+def _store_span(kwargs: "Dict[str, Any]", span: "Any") -> None:
+    kwargs[_SPAN_KEY] = span
+
+
+def _peek_span(kwargs: "Dict[str, Any]") -> "Any":
+    return kwargs.get(_SPAN_KEY)
+
+
+def _pop_span(kwargs: "Dict[str, Any]") -> "Any":
+    return kwargs.pop(_SPAN_KEY, None)
 
 
 def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
@@ -117,8 +122,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
         )
         span.__enter__()
 
-    # Store span for later
-    _get_metadata_dict(kwargs)["_sentry_span"] = span
+    _store_span(kwargs, span)
 
     # Set basic data
     set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider)
@@ -198,8 +202,7 @@ def _success_callback(
 ) -> None:
     """Handle successful completion."""
 
-    metadata = _get_metadata_dict(kwargs)
-    span = metadata.get("_sentry_span")
+    span = _peek_span(kwargs)
     if span is None:
         return
 
@@ -259,7 +262,7 @@ def _success_callback(
             or "complete_streaming_response" in kwargs
             or "async_complete_streaming_response" in kwargs
         ):
-            span = metadata.pop("_sentry_span", None)
+            span = _pop_span(kwargs)
             if span is not None:
                 span.__exit__(None, None, None)
 
@@ -285,7 +288,7 @@ def _failure_callback(
     end_time: "datetime",
 ) -> None:
     """Handle request failure."""
-    span = _get_metadata_dict(kwargs).get("_sentry_span")
+    span = _pop_span(kwargs)
     if span is None:
         return
 
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index 39e173049b..78cfba3b7f 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -5,6 +5,7 @@
 from datetime import datetime
 from unittest import mock
 
+import httpx
 import pytest
 
 import sentry_sdk
@@ -2532,6 +2533,61 @@ def test_integration_setup(sentry_init):
     assert _failure_callback in (litellm.failure_callback or [])
 
 
+@pytest.mark.asyncio(loop_scope="session")
+async def test_anthropic_passthrough_request_stays_serializable(
+    reset_litellm_executor, sentry_init
+):
+    """Regression test for GH-6596: litellm's Anthropic ``/v1/messages``
+    passthrough forwards the caller's ``metadata`` into the request body, so the
+    integration must not make that body unserializable. Drive the real
+    passthrough with a mocked transport and assert the request body serializes.
+    """
+    sentry_init(
+        integrations=[LiteLLMIntegration()],
+        disabled_integrations=[StdlibIntegration],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+
+    captured = {}
+    anthropic_response = {
+        "id": "msg_1",
+        "type": "message",
+        "role": "assistant",
+        "content": [{"type": "text", "text": "Hi there"}],
+        "model": "claude-3-5-sonnet-latest",
+        "stop_reason": "end_turn",
+        "stop_sequence": None,
+        "usage": {"input_tokens": 1, "output_tokens": 1},
+    }
+
+    client = AsyncHTTPHandler()
+
+    def capture_post(*args, **kwargs):
+        captured["data"] = kwargs.get("data")
+        return httpx.Response(
+            200,
+            json=anthropic_response,
+            request=httpx.Request("POST", "https://api.anthropic.com/v1/messages"),
+        )
+
+    with mock.patch.object(client, "post", side_effect=capture_post), start_transaction(
+        name="litellm test"
+    ):
+        await litellm.anthropic.messages.acreate(
+            model="anthropic/claude-3-5-sonnet-latest",
+            messages=[{"role": "user", "content": "Hello!"}],
+            max_tokens=16,
+            metadata={"user_id": "my-org"},
+            api_key="test-key",
+            client=client,
+        )
+
+    assert "data" in captured
+    request_body = json.loads(captured["data"])
+    assert request_body["metadata"] == {"user_id": "my-org"}
+
+
 def test_litellm_message_truncation(sentry_init, capture_events):
     """Test that large messages are truncated properly in LiteLLM integration."""
     sentry_init(