fix(ai): Keep single content input message (#5345)

alexander-alderman-webb · web-flow · commit d4db6d32d6f8 · 2026-01-28T10:18:02.000+01:00
Store only the last input message on the `gen_ai.request.messages` attribute.

Keep prior logic that progressively trims away messages only for embeddings. Remove tests that check the existence of conversation histories or prompt messages that are no longer set on the attribute.
diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
@@ -682,6 +682,26 @@ def truncate_messages_by_size(
 
 
 def truncate_and_annotate_messages(
+    messages: "Optional[List[Dict[str, Any]]]",
+    span: "Any",
+    scope: "Any",
+    max_single_message_chars: int = MAX_SINGLE_MESSAGE_CONTENT_CHARS,
+) -> "Optional[List[Dict[str, Any]]]":
+    if not messages:
+        return None
+
+    messages = redact_blob_message_parts(messages)
+
+    truncated_message = _truncate_single_message_content_if_present(
+        deepcopy(messages[-1]), max_chars=max_single_message_chars
+    )
+    if len(messages) > 1:
+        scope._gen_ai_original_message_count[span.span_id] = len(messages)
+
+    return [truncated_message]
+
+
+def truncate_and_annotate_embedding_inputs(
     messages: "Optional[List[Dict[str, Any]]]",
     span: "Any",
     scope: "Any",
diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py
@@ -9,6 +9,7 @@
     set_data_normalized,
     truncate_and_annotate_messages,
     transform_openai_content_part,
+    truncate_and_annotate_embedding_inputs,
 )
 from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations import DidNotEnable, Integration
@@ -118,7 +119,9 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
                     if isinstance(embedding_input, list)
                     else [embedding_input]
                 )
-                messages_data = truncate_and_annotate_messages(input_list, span, scope)
+                messages_data = truncate_and_annotate_embedding_inputs(
+                    input_list, span, scope
+                )
                 if messages_data is not None:
                     set_data_normalized(
                         span,
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
@@ -11,6 +11,7 @@
     set_data_normalized,
     normalize_message_roles,
     truncate_and_annotate_messages,
+    truncate_and_annotate_embedding_inputs,
 )
 from sentry_sdk.ai._openai_completions_api import (
     _is_system_instruction as _is_system_instruction_completions,
@@ -414,7 +415,9 @@ def _set_embeddings_input_data(
     ):
         normalized_messages = normalize_message_roles(messages)  # type: ignore
         scope = sentry_sdk.get_current_scope()
-        messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
+        messages_data = truncate_and_annotate_embedding_inputs(
+            normalized_messages, span, scope
+        )
         if messages_data is not None:
             set_data_normalized(
                 span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, messages_data, unpack=False
diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py
@@ -899,7 +899,25 @@ def test_set_output_data_with_input_json_delta(sentry_init):
         assert span._data.get(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS) == 30
 
 
-def test_anthropic_message_role_mapping(sentry_init, capture_events):
+# Test messages with mixed roles including "ai" that should be mapped to "assistant"
+@pytest.mark.parametrize(
+    "test_message,expected_role",
+    [
+        ({"role": "system", "content": "You are helpful."}, "system"),
+        ({"role": "user", "content": "Hello"}, "user"),
+        (
+            {"role": "ai", "content": "Hi there!"},
+            "assistant",
+        ),  # Should be mapped to "assistant"
+        (
+            {"role": "assistant", "content": "How can I help?"},
+            "assistant",
+        ),  # Should stay "assistant"
+    ],
+)
+def test_anthropic_message_role_mapping(
+    sentry_init, capture_events, test_message, expected_role
+):
     """Test that Anthropic integration properly maps message roles like 'ai' to 'assistant'"""
     sentry_init(
         integrations=[AnthropicIntegration(include_prompts=True)],
@@ -924,13 +942,7 @@ def mock_messages_create(*args, **kwargs):
 
     client.messages._post = mock.Mock(return_value=mock_messages_create())
 
-    # Test messages with mixed roles including "ai" that should be mapped to "assistant"
-    test_messages = [
-        {"role": "system", "content": "You are helpful."},
-        {"role": "user", "content": "Hello"},
-        {"role": "ai", "content": "Hi there!"},  # Should be mapped to "assistant"
-        {"role": "assistant", "content": "How can I help?"},  # Should stay "assistant"
-    ]
+    test_messages = [test_message]
 
     with start_transaction(name="anthropic tx"):
         client.messages.create(
@@ -948,22 +960,7 @@ def mock_messages_create(*args, **kwargs):
     # Parse the stored messages
     stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
 
-    # Verify that "ai" role was mapped to "assistant"
-    assert len(stored_messages) == 4
-    assert stored_messages[0]["role"] == "system"
-    assert stored_messages[1]["role"] == "user"
-    assert (
-        stored_messages[2]["role"] == "assistant"
-    )  # "ai" should be mapped to "assistant"
-    assert stored_messages[3]["role"] == "assistant"  # should stay "assistant"
-
-    # Verify content is preserved
-    assert stored_messages[2]["content"] == "Hi there!"
-    assert stored_messages[3]["content"] == "How can I help?"
-
-    # Verify no "ai" roles remain
-    roles = [msg["role"] for msg in stored_messages]
-    assert "ai" not in roles
+    assert stored_messages[0]["role"] == expected_role
 
 
 def test_anthropic_message_truncation(sentry_init, capture_events):
@@ -1010,9 +1007,60 @@ def test_anthropic_message_truncation(sentry_init, capture_events):
 
     parsed_messages = json.loads(messages_data)
     assert isinstance(parsed_messages, list)
-    assert len(parsed_messages) == 2
-    assert "small message 4" in str(parsed_messages[0])
-    assert "small message 5" in str(parsed_messages[1])
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
+
+    assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
+
+
+@pytest.mark.asyncio
+async def test_anthropic_message_truncation_async(sentry_init, capture_events):
+    """Test that large messages are truncated properly in Anthropic integration."""
+    sentry_init(
+        integrations=[AnthropicIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    client = AsyncAnthropic(api_key="z")
+    client.messages._post = mock.AsyncMock(return_value=EXAMPLE_MESSAGE)
+
+    large_content = (
+        "This is a very long message that will exceed our size limits. " * 1000
+    )
+    messages = [
+        {"role": "user", "content": "small message 1"},
+        {"role": "assistant", "content": large_content},
+        {"role": "user", "content": large_content},
+        {"role": "assistant", "content": "small message 4"},
+        {"role": "user", "content": "small message 5"},
+    ]
+
+    with start_transaction():
+        await client.messages.create(max_tokens=1024, messages=messages, model="model")
+
+    assert len(events) > 0
+    tx = events[0]
+    assert tx["type"] == "transaction"
+
+    chat_spans = [
+        span for span in tx.get("spans", []) if span.get("op") == OP.GEN_AI_CHAT
+    ]
+    assert len(chat_spans) > 0
+
+    chat_span = chat_spans[0]
+    assert chat_span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in chat_span["data"]
+
+    messages_data = chat_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
+
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
+
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
@@ -1497,60 +1497,6 @@ def test_generate_content_with_content_object(
     ]
 
 
-def test_generate_content_with_conversation_history(
-    sentry_init, capture_events, mock_genai_client
-):
-    """Test generate_content with list of Content objects (conversation history)."""
-    sentry_init(
-        integrations=[GoogleGenAIIntegration(include_prompts=True)],
-        traces_sample_rate=1.0,
-        send_default_pii=True,
-    )
-    events = capture_events()
-
-    mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
-
-    # Create conversation history
-    contents = [
-        genai_types.Content(
-            role="user", parts=[genai_types.Part(text="What is the capital of France?")]
-        ),
-        genai_types.Content(
-            role="model",
-            parts=[genai_types.Part(text="The capital of France is Paris.")],
-        ),
-        genai_types.Content(
-            role="user", parts=[genai_types.Part(text="What about Germany?")]
-        ),
-    ]
-
-    with mock.patch.object(
-        mock_genai_client._api_client, "request", return_value=mock_http_response
-    ):
-        with start_transaction(name="google_genai"):
-            mock_genai_client.models.generate_content(
-                model="gemini-1.5-flash", contents=contents, config=create_test_config()
-            )
-
-    (event,) = events
-    invoke_span = event["spans"][0]
-
-    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
-    assert len(messages) == 3
-    assert messages[0]["role"] == "user"
-    assert messages[0]["content"] == [
-        {"text": "What is the capital of France?", "type": "text"}
-    ]
-    assert (
-        messages[1]["role"] == "assistant"
-    )  # "model" should be normalized to "assistant"
-    assert messages[1]["content"] == [
-        {"text": "The capital of France is Paris.", "type": "text"}
-    ]
-    assert messages[2]["role"] == "user"
-    assert messages[2]["content"] == [{"text": "What about Germany?", "type": "text"}]
-
-
 def test_generate_content_with_dict_format(
     sentry_init, capture_events, mock_genai_client
 ):
@@ -1720,17 +1666,12 @@ def test_generate_content_with_function_response(
     invoke_span = event["spans"][0]
 
     messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
-    assert len(messages) == 2
+    assert len(messages) == 1
     # First message is user message
-    assert messages[0]["role"] == "user"
-    assert messages[0]["content"] == [
-        {"text": "What's the weather in Paris?", "type": "text"}
-    ]
-    # Second message is tool message
-    assert messages[1]["role"] == "tool"
-    assert messages[1]["content"]["toolCallId"] == "call_123"
-    assert messages[1]["content"]["toolName"] == "get_weather"
-    assert messages[1]["content"]["output"] == '"Sunny, 72F"'
+    assert messages[0]["role"] == "tool"
+    assert messages[0]["content"]["toolCallId"] == "call_123"
+    assert messages[0]["content"]["toolName"] == "get_weather"
+    assert messages[0]["content"]["output"] == '"Sunny, 72F"'
 
 
 def test_generate_content_with_mixed_string_and_content(
@@ -1771,18 +1712,10 @@ def test_generate_content_with_mixed_string_and_content(
     invoke_span = event["spans"][0]
 
     messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
-    assert len(messages) == 3
-    # String becomes user message
-    assert messages[0]["role"] == "user"
-    assert messages[0]["content"] == "Hello, this is a string message"
-    # Model role normalized to assistant
-    assert messages[1]["role"] == "assistant"
-    assert messages[1]["content"] == [
-        {"text": "Hi! How can I help you?", "type": "text"}
-    ]
+    assert len(messages) == 1
     # User message
-    assert messages[2]["role"] == "user"
-    assert messages[2]["content"] == [{"text": "Tell me a joke", "type": "text"}]
+    assert messages[0]["role"] == "user"
+    assert messages[0]["content"] == [{"text": "Tell me a joke", "type": "text"}]
 
 
 def test_generate_content_with_part_object_directly(
@@ -1850,13 +1783,9 @@ def test_generate_content_with_list_of_dicts(
     invoke_span = event["spans"][0]
 
     messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
-    assert len(messages) == 3
+    assert len(messages) == 1
     assert messages[0]["role"] == "user"
-    assert messages[0]["content"] == [{"text": "First user message", "type": "text"}]
-    assert messages[1]["role"] == "assistant"
-    assert messages[1]["content"] == [{"text": "First model response", "type": "text"}]
-    assert messages[2]["role"] == "user"
-    assert messages[2]["content"] == [{"text": "Second user message", "type": "text"}]
+    assert messages[0]["content"] == [{"text": "Second user message", "type": "text"}]
 
 
 def test_generate_content_with_dict_inline_data(
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
@@ -1068,9 +1068,8 @@ def test_langchain_message_truncation(sentry_init, capture_events):
 
     parsed_messages = json.loads(messages_data)
     assert isinstance(parsed_messages, list)
-    assert len(parsed_messages) == 2
-    assert "small message 4" in str(parsed_messages[0])
-    assert "small message 5" in str(parsed_messages[1])
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
diff --git a/tests/integrations/langgraph/test_langgraph.py b/tests/integrations/langgraph/test_langgraph.py
@@ -270,9 +270,8 @@ def original_invoke(self, *args, **kwargs):
             import json
 
             request_messages = json.loads(request_messages)
-        assert len(request_messages) == 2
-        assert request_messages[0]["content"] == "Hello, can you help me?"
-        assert request_messages[1]["content"] == "Of course! How can I assist you?"
+        assert len(request_messages) == 1
+        assert request_messages[0]["content"] == "Of course! How can I assist you?"
 
         response_text = invoke_span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
         assert response_text == expected_assistant_response
@@ -1383,7 +1382,6 @@ def original_invoke(self, *args, **kwargs):
 
     parsed_messages = json.loads(messages_data)
     assert isinstance(parsed_messages, list)
-    assert len(parsed_messages) == 2
-    assert "small message 4" in str(parsed_messages[0])
-    assert "small message 5" in str(parsed_messages[1])
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
@@ -752,9 +752,8 @@ def test_litellm_message_truncation(sentry_init, capture_events):
 
     parsed_messages = json.loads(messages_data)
     assert isinstance(parsed_messages, list)
-    assert len(parsed_messages) == 2
-    assert "small message 4" in str(parsed_messages[0])
-    assert "small message 5" in str(parsed_messages[1])
+    assert len(parsed_messages) == 1
+    assert "small message 5" in str(parsed_messages[0])
     assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5
 
 
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py