Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sentry_sdk/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,3 +359,7 @@ class SDKInfo(TypedDict):
)

HttpStatusCodeRange = Union[int, Container[int]]

class TextPart(TypedDict):
type: Literal["text"]
content: str
6 changes: 6 additions & 0 deletions sentry_sdk/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,12 @@ class SPANDATA:
Example: 2048
"""

GEN_AI_SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions"
"""
The system instructions passed to the model.
Example: [{"type": "text", "text": "You are a helpful assistant."},{"type": "text", "text": "Be concise and clear."}]
"""

GEN_AI_REQUEST_MESSAGES = "gen_ai.request.messages"
"""
The messages passed to the model. The "content" can be a string or an array of objects.
Expand Down
82 changes: 62 additions & 20 deletions sentry_sdk/integrations/google_genai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,16 @@
event_from_exception,
safe_serialize,
)
from google.genai.types import GenerateContentConfig
from google.genai.types import GenerateContentConfig, Part, Content
from itertools import chain

if TYPE_CHECKING:
from sentry_sdk.tracing import Span
from sentry_sdk._types import TextPart
from google.genai.types import (
GenerateContentResponse,
ContentListUnion,
ContentUnionDict,
Tool,
Model,
EmbedContentResponse,
Expand Down Expand Up @@ -720,6 +723,56 @@ def extract_finish_reasons(
return finish_reasons if finish_reasons else None


def _transform_system_instruction_one_level(
system_instructions: "ContentUnionDict",
) -> "list[TextPart]":
text_parts: "list[TextPart]" = []

if isinstance(system_instructions, str):
return [{"type": "text", "content": system_instructions}]

if isinstance(system_instructions, Part) and system_instructions.text:
return [{"type": "text", "content": system_instructions.text}]

if isinstance(system_instructions, Content):
for part in system_instructions.parts or []:
if part.text:
text_parts.append({"type": "text", "content": part.text})
return text_parts

if isinstance(system_instructions, dict):
if system_instructions.get("text"):
return [{"type": "text", "content": system_instructions["text"]}]

parts = system_instructions.get("parts", [])
for part in parts:
if isinstance(part, Part) and part.text:
text_parts.append({"type": "text", "content": part.text})
elif isinstance(part, dict) and part.get("text"):
text_parts.append({"type": "text", "content": part["text"]})
return text_parts

return text_parts


def _transform_system_instructions(
system_instructions: "ContentUnionDict",
) -> "list[TextPart]":
text_parts: "list[TextPart]" = []

if isinstance(system_instructions, list):
text_parts = list(
chain.from_iterable(
_transform_system_instruction_one_level(instructions)
for instructions in system_instructions
)
)

return text_parts

return _transform_system_instruction_one_level(system_instructions)


def set_span_data_for_request(
span: "Span",
integration: "Any",
Expand All @@ -743,25 +796,14 @@ def set_span_data_for_request(
# Add system instruction if present
if config and hasattr(config, "system_instruction"):
system_instruction = config.system_instruction
if system_instruction:
system_messages = extract_contents_messages(system_instruction)
# System instruction should be a single system message
# Extract text from all messages and combine into one system message
system_texts = []
for msg in system_messages:
content = msg.get("content")
if isinstance(content, list):
# Extract text from content parts
for part in content:
if isinstance(part, dict) and part.get("type") == "text":
system_texts.append(part.get("text", ""))
elif isinstance(content, str):
system_texts.append(content)

if system_texts:
messages.append(
{"role": "system", "content": " ".join(system_texts)}
)

if system_instruction is not None:
set_data_normalized(
span,
SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
_transform_system_instructions(system_instruction),
unpack=False,
)

# Extract messages from contents
contents_messages = extract_contents_messages(contents)
Expand Down
132 changes: 119 additions & 13 deletions tests/integrations/google_genai/test_google_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from google import genai
from google.genai import types as genai_types
from google.genai.types import Content, Part

from sentry_sdk import start_transaction
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
Expand Down Expand Up @@ -186,6 +187,7 @@ def test_nonstreaming_generate_content(
response_texts = json.loads(response_text)
assert response_texts == ["Hello! How can I help you today?"]
else:
assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"]
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in invoke_span["data"]
assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_span["data"]

Expand All @@ -202,8 +204,107 @@ def test_nonstreaming_generate_content(
assert invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100


# Threatened bot to generate as many cases as possible
@pytest.mark.parametrize(
"system_instructions,expected_texts",
[
("You are a helpful assistant", ["You are a helpful assistant"]),
(
["You are a translator", "Translate to French"],
["You are a translator", "Translate to French"],
),
(
Content(role="user", parts=[Part(text="You are a helpful assistant")]),
["You are a helpful assistant"],
),
(
Content(
role="user",
parts=[
Part(text="You are a translator"),
Part(text="Translate to French"),
],
),
["You are a translator", "Translate to French"],
),
(
{"parts": [{"text": "You are a helpful assistant"}], "role": "user"},
["You are a helpful assistant"],
),
(
{
"parts": [
{"text": "You are a translator"},
{"text": "Translate to French"},
],
"role": "user",
},
["You are a translator", "Translate to French"],
),
(Part(text="You are a helpful assistant"), ["You are a helpful assistant"]),
({"text": "You are a helpful assistant"}, ["You are a helpful assistant"]),
(
[Part(text="You are a translator"), Part(text="Translate to French")],
["You are a translator", "Translate to French"],
),
(
[{"text": "You are a translator"}, {"text": "Translate to French"}],
["You are a translator", "Translate to French"],
),
(
[Part(text="First instruction"), {"text": "Second instruction"}],
["First instruction", "Second instruction"],
),
(
{
"parts": [
Part(text="First instruction"),
{"text": "Second instruction"},
],
"role": "user",
},
["First instruction", "Second instruction"],
),
(None, None),
("", []),
({}, []),
({"parts": []}, []),
(Content(role="user", parts=[]), []),
(
{
"parts": [
{"text": "Text part"},
{"file_data": {"file_uri": "gs://bucket/file.pdf"}},
],
"role": "user",
},
["Text part"],
),
(
{
"parts": [
{"text": "First"},
Part(text="Second"),
{"text": "Third"},
],
"role": "user",
},
["First", "Second", "Third"],
),
(
{
"parts": [
Part(text="First"),
Part(text="Second"),
Part(text="Third"),
],
},
["First", "Second", "Third"],
),
],
)
def test_generate_content_with_system_instruction(
sentry_init, capture_events, mock_genai_client
sentry_init, capture_events, mock_genai_client, system_instructions, expected_texts
):
sentry_init(
integrations=[GoogleGenAIIntegration(include_prompts=True)],
Expand All @@ -219,7 +320,7 @@ def test_generate_content_with_system_instruction(
):
with start_transaction(name="google_genai"):
config = create_test_config(
system_instruction="You are a helpful assistant",
system_instruction=system_instructions,
temperature=0.5,
)
mock_genai_client.models.generate_content(
Expand All @@ -229,14 +330,21 @@ def test_generate_content_with_system_instruction(
(event,) = events
invoke_span = event["spans"][0]

# Check that system instruction is included in messages
if expected_texts is None:
assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_span["data"]
return

# (PII is enabled and include_prompts is True in this test)
messages_str = invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
# Parse the JSON string to verify content
messages = json.loads(messages_str)
assert len(messages) == 2
assert messages[0] == {"role": "system", "content": "You are a helpful assistant"}
assert messages[1] == {"role": "user", "content": "What is 2+2?"}
system_instructions = json.loads(
invoke_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
)

if isinstance(expected_texts, str):
assert system_instructions == [{"type": "text", "content": expected_texts}]
else:
assert system_instructions == [
{"type": "text", "content": text} for text in expected_texts
]


def test_generate_content_with_tools(sentry_init, capture_events, mock_genai_client):
Expand Down Expand Up @@ -933,10 +1041,8 @@ def test_google_genai_message_truncation(
with start_transaction(name="google_genai"):
mock_genai_client.models.generate_content(
model="gemini-1.5-flash",
contents=small_content,
config=create_test_config(
system_instruction=large_content,
),
contents=[large_content, small_content],
config=create_test_config(),
)

(event,) = events
Expand Down
Loading