diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py index 3b18712d3e..9db651182b 100644 --- a/sentry_sdk/integrations/google_genai/utils.py +++ b/sentry_sdk/integrations/google_genai/utils.py @@ -32,16 +32,20 @@ event_from_exception, safe_serialize, ) -from google.genai.types import GenerateContentConfig +from google.genai.types import GenerateContentConfig, Part, Content +from itertools import chain if TYPE_CHECKING: from sentry_sdk.tracing import Span + from sentry_sdk._types import TextPart from google.genai.types import ( GenerateContentResponse, ContentListUnion, + ContentUnionDict, Tool, Model, EmbedContentResponse, + ContentUnion, ) @@ -720,6 +724,62 @@ def extract_finish_reasons( return finish_reasons if finish_reasons else None +def _transform_system_instruction_one_level( + system_instructions: "Union[ContentUnionDict, ContentUnion]", + can_be_content: bool, +) -> "list[TextPart]": + text_parts: "list[TextPart]" = [] + + if isinstance(system_instructions, str): + return [{"type": "text", "content": system_instructions}] + + if isinstance(system_instructions, Part) and system_instructions.text: + return [{"type": "text", "content": system_instructions.text}] + + if can_be_content and isinstance(system_instructions, Content): + if isinstance(system_instructions.parts, list): + for part in system_instructions.parts: + if isinstance(part.text, str): + text_parts.append({"type": "text", "content": part.text}) + return text_parts + + if isinstance(system_instructions, dict) and system_instructions.get("text"): + return [{"type": "text", "content": system_instructions["text"]}] + + elif can_be_content and isinstance(system_instructions, dict): + parts = system_instructions.get("parts", []) + for part in parts: + if isinstance(part, Part) and isinstance(part.text, str): + text_parts.append({"type": "text", "content": part.text}) + elif isinstance(part, dict) and isinstance(part.get("text"), str): + text_parts.append({"type": "text", "content": part["text"]}) + return text_parts + + return text_parts + + +def _transform_system_instructions( + system_instructions: "Union[ContentUnionDict, ContentUnion]", +) -> "list[TextPart]": + text_parts: "list[TextPart]" = [] + + if isinstance(system_instructions, list): + text_parts = list( + chain.from_iterable( + _transform_system_instruction_one_level( + instructions, can_be_content=False + ) + for instructions in system_instructions + ) + ) + + return text_parts + + return _transform_system_instruction_one_level( + system_instructions, can_be_content=True + ) + + def set_span_data_for_request( span: "Span", integration: "Any", @@ -741,27 +801,19 @@ def set_span_data_for_request( messages = [] # Add system instruction if present + system_instructions = None if config and hasattr(config, "system_instruction"): - system_instruction = config.system_instruction - if system_instruction: - system_messages = extract_contents_messages(system_instruction) - # System instruction should be a single system message - # Extract text from all messages and combine into one system message - system_texts = [] - for msg in system_messages: - content = msg.get("content") - if isinstance(content, list): - # Extract text from content parts - for part in content: - if isinstance(part, dict) and part.get("type") == "text": - system_texts.append(part.get("text", "")) - elif isinstance(content, str): - system_texts.append(content) - - if system_texts: - messages.append( - {"role": "system", "content": " ".join(system_texts)} - ) + system_instructions = config.system_instruction + elif isinstance(config, dict) and "system_instruction" in config: + system_instructions = config.get("system_instruction") + + if system_instructions is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + _transform_system_instructions(system_instructions), + unpack=False, + ) # Extract messages from contents contents_messages = extract_contents_messages(contents) diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py index ad89b878ea..2c4aa0b9f0 100644 --- a/tests/integrations/google_genai/test_google_genai.py +++ b/tests/integrations/google_genai/test_google_genai.py @@ -4,6 +4,7 @@ from google import genai from google.genai import types as genai_types +from google.genai.types import Content, Part from sentry_sdk import start_transaction from sentry_sdk._types import BLOB_DATA_SUBSTITUTE @@ -106,11 +107,6 @@ def create_test_config( if seed is not None: config_dict["seed"] = seed if system_instruction is not None: - # Convert string to Content for system instruction - if isinstance(system_instruction, str): - system_instruction = genai_types.Content( - parts=[genai_types.Part(text=system_instruction)], role="system" - ) config_dict["system_instruction"] = system_instruction if tools is not None: config_dict["tools"] = tools @@ -186,6 +182,7 @@ def test_nonstreaming_generate_content( response_texts = json.loads(response_text) assert response_texts == ["Hello! How can I help you today?"] else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"] assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span["data"] assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["data"] @@ -202,8 +199,41 @@ def test_nonstreaming_generate_content( assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 +@pytest.mark.parametrize("generate_content_config", (False, True)) +@pytest.mark.parametrize( + "system_instructions,expected_texts", + [ + (None, None), + ({}, []), + (Content(role="system", parts=[]), []), + ({"parts": []}, []), + ("You are a helpful assistant.", ["You are a helpful assistant."]), + (Part(text="You are a helpful assistant."), ["You are a helpful assistant."]), + ( + Content(role="system", parts=[Part(text="You are a helpful assistant.")]), + ["You are a helpful assistant."], + ), + ({"text": "You are a helpful assistant."}, ["You are a helpful assistant."]), + ( + {"parts": [Part(text="You are a helpful assistant.")]}, + ["You are a helpful assistant."], + ), + ( + {"parts": [{"text": "You are a helpful assistant."}]}, + ["You are a helpful assistant."], + ), + (["You are a helpful assistant."], ["You are a helpful assistant."]), + ([Part(text="You are a helpful assistant.")], ["You are a helpful assistant."]), + ([{"text": "You are a helpful assistant."}], ["You are a helpful assistant."]), + ], +) def test_generate_content_with_system_instruction( - sentry_init, capture_events, mock_genai_client + sentry_init, + capture_events, + mock_genai_client, + generate_content_config, + system_instructions, + expected_texts, ): sentry_init( integrations=[GoogleGenAIIntegration(include_prompts=True)], @@ -218,25 +248,35 @@ def test_generate_content_with_system_instruction( mock_genai_client._api_client, "request", return_value=mock_http_response ): with start_transaction(name="google_genai"): - config = create_test_config( - system_instruction="You are a helpful assistant", - temperature=0.5, - ) + config = { + "system_instruction": system_instructions, + "temperature": 0.5, + } + + if generate_content_config: + config = create_test_config(**config) + mock_genai_client.models.generate_content( - model="gemini-1.5-flash", contents="What is 2+2?", config=config + model="gemini-1.5-flash", + contents="What is 2+2?", + config=config, ) (event,) = events invoke_span = event["spans"][0] - # Check that system instruction is included in messages + if expected_texts is None: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"] + return + # (PII is enabled and include_prompts is True in this test) - messages_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - # Parse the JSON string to verify content - messages = json.loads(messages_str) - assert len(messages) == 2 - assert messages[0] == {"role": "system", "content": "You are a helpful assistant"} - assert messages[1] == {"role": "user", "content": "What is 2+2?"} + system_instructions = json.loads( + invoke_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] + ) + + assert system_instructions == [ + {"type": "text", "content": text} for text in expected_texts + ] def test_generate_content_with_tools(sentry_init, capture_events, mock_genai_client): @@ -933,10 +973,8 @@ def test_google_genai_message_truncation( with start_transaction(name="google_genai"): mock_genai_client.models.generate_content( model="gemini-1.5-flash", - contents=small_content, - config=create_test_config( - system_instruction=large_content, - ), + contents=[large_content, small_content], + config=create_test_config(), ) (event,) = events