diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index 7043bbc2ee..ecb8abcd10 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -359,3 +359,7 @@ class SDKInfo(TypedDict): ) HttpStatusCodeRange = Union[int, Container[int]] + + class TextPart(TypedDict): + type: Literal["text"] + content: str diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 93fca6ba3e..4b61a317fb 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -542,6 +542,12 @@ class SPANDATA: Example: 2048 """ + GEN_AI_SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions" + """ + The system instructions passed to the model. + Example: [{"type": "text", "text": "You are a helpful assistant."},{"type": "text", "text": "Be concise and clear."}] + """ + GEN_AI_REQUEST_MESSAGES = "gen_ai.request.messages" """ The messages passed to the model. The "content" can be a string or an array of objects. diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index 2bc48e54e3..862f073cad 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -39,13 +39,14 @@ from anthropic.resources import AsyncMessages, Messages if TYPE_CHECKING: - from anthropic.types import MessageStreamEvent + from anthropic.types import MessageStreamEvent, TextBlockParam except ImportError: raise DidNotEnable("Anthropic not installed") if TYPE_CHECKING: from typing import Any, AsyncIterator, Iterator, List, Optional, Union from sentry_sdk.tracing import Span + from sentry_sdk._types import TextPart class AnthropicIntegration(Integration): @@ -177,6 +178,27 @@ def _transform_anthropic_content_block( return result if result is not None else content_block +def _transform_system_instructions( + system_instructions: "Union[str, Iterable[TextBlockParam]]", +) -> "list[TextPart]": + if isinstance(system_instructions, str): + return [ + { + "type": "text", + "content": system_instructions, + } + ] + + return [ + { + "type": "text", + "content": instruction["text"], + } + for instruction in system_instructions + if isinstance(instruction, dict) and "text" in instruction + ] + + def _set_input_data( span: "Span", kwargs: "dict[str, Any]", integration: "AnthropicIntegration" ) -> None: @@ -184,7 +206,7 @@ def _set_input_data( Set input data for the span based on the provided keyword arguments for the anthropic message creation. """ set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "chat") - system_prompt = kwargs.get("system") + system_instructions: "Union[str, Iterable[TextBlockParam]]" = kwargs.get("system") # type: ignore messages = kwargs.get("messages") if ( messages is not None @@ -192,29 +214,17 @@ def _set_input_data( and should_send_default_pii() and integration.include_prompts ): - normalized_messages = [] - if system_prompt: - system_prompt_content: "Optional[Union[str, List[dict[str, Any]]]]" = None - if isinstance(system_prompt, str): - system_prompt_content = system_prompt - elif isinstance(system_prompt, Iterable): - system_prompt_content = [] - for item in system_prompt: - if ( - isinstance(item, dict) - and item.get("type") == "text" - and item.get("text") - ): - system_prompt_content.append(item.copy()) - - if system_prompt_content: - normalized_messages.append( - { - "role": GEN_AI_ALLOWED_MESSAGE_ROLES.SYSTEM, - "content": system_prompt_content, - } - ) + if isinstance(system_instructions, str) or isinstance( + system_instructions, Iterable + ): + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + _transform_system_instructions(system_instructions), + unpack=False, + ) + normalized_messages = [] for message in messages: if ( message.get("role") == GEN_AI_ALLOWED_MESSAGE_ROLES.USER diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index e8bc4648b6..0041aab928 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -1074,17 +1074,22 @@ def test_nonstreaming_create_message_with_system_prompt( assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + system_instructions = json.loads( + span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(stored_messages) == 2 - # System message should be first - assert stored_messages[0]["role"] == "system" - assert stored_messages[0]["content"] == "You are a helpful assistant." - # User message should be second - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello, Claude" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] @@ -1153,17 +1158,22 @@ async def test_nonstreaming_create_message_with_system_prompt_async( assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + system_instructions = json.loads( + span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(stored_messages) == 2 - # System message should be first - assert stored_messages[0]["role"] == "system" - assert stored_messages[0]["content"] == "You are a helpful assistant." - # User message should be second - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello, Claude" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi, I'm Claude." else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] @@ -1264,18 +1274,23 @@ def test_streaming_create_message_with_system_prompt( assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + system_instructions = json.loads( + span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(stored_messages) == 2 - # System message should be first - assert stored_messages[0]["role"] == "system" - assert stored_messages[0]["content"] == "You are a helpful assistant." - # User message should be second - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello, Claude" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] @@ -1379,18 +1394,23 @@ async def test_streaming_create_message_with_system_prompt_async( assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "model" if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + system_instructions = json.loads( + span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."} + ] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - assert len(stored_messages) == 2 - # System message should be first - assert stored_messages[0]["role"] == "system" - assert stored_messages[0]["content"] == "You are a helpful assistant." - # User message should be second - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello, Claude" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello, Claude" assert span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] == "Hi! I'm Claude!" else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in span["data"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] @@ -1437,21 +1457,23 @@ def test_system_prompt_with_complex_structure(sentry_init, capture_events): (span,) = event["spans"] assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS in span["data"] + system_instructions = json.loads(span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + + # System content should be a list of text blocks + assert isinstance(system_instructions, list) + assert system_instructions == [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] stored_messages = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) - # Should have system message first, then user message - assert len(stored_messages) == 2 - assert stored_messages[0]["role"] == "system" - # System content should be a list of text blocks - assert isinstance(stored_messages[0]["content"], list) - assert len(stored_messages[0]["content"]) == 2 - assert stored_messages[0]["content"][0]["type"] == "text" - assert stored_messages[0]["content"][0]["text"] == "You are a helpful assistant." - assert stored_messages[0]["content"][1]["type"] == "text" - assert stored_messages[0]["content"][1]["text"] == "Be concise and clear." - assert stored_messages[1]["role"] == "user" - assert stored_messages[1]["content"] == "Hello" + assert len(stored_messages) == 1 + assert stored_messages[0]["role"] == "user" + assert stored_messages[0]["content"] == "Hello" # Tests for transform_content_part (shared) and _transform_anthropic_content_block helper functions