diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index b9b24e2028..2a8f75714e 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -215,6 +215,24 @@ def _get_system_instructions_completions( ] +def _is_system_instruction_responses(message: "ResponseInputItemParam") -> bool: + if not isinstance(message, dict) or not message.get("role") == "system": + return False + + return "type" not in message or message["type"] == "message" + + +def _get_system_instructions_responses( + messages: "Union[str, ResponseInputParam]", +) -> "list[ResponseInputItemParam]": + if not isinstance(messages, list): + return [] + + return [ + message for message in messages if _is_system_instruction_responses(message) + ] + + def _transform_system_instructions( system_instructions: "list[ChatCompletionSystemMessageParam]", ) -> "list[TextPart]": @@ -289,17 +307,80 @@ def _set_responses_api_input_data( kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ) -> None: - messages: "Optional[Union[ResponseInputParam, list[str]]]" = _get_input_messages( - kwargs - ) + explicit_instructions: "Union[Optional[str], Omit]" = kwargs.get("instructions") + messages: "Optional[Union[str, ResponseInputParam]]" = kwargs.get("input") + + if not should_send_default_pii() or not integration.include_prompts: + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") + _commmon_set_input_data(span, kwargs) + return if ( - messages is not None - and len(messages) > 0 - and should_send_default_pii() - and integration.include_prompts + messages is None + and explicit_instructions is not None + and _is_given(explicit_instructions) ): - normalized_messages = normalize_message_roles(messages) # type: ignore + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + [ + { + "type": "text", + "content": explicit_instructions, + } + ], + unpack=False, + ) + + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") + _commmon_set_input_data(span, kwargs) + return + + if messages is None: + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") + _commmon_set_input_data(span, kwargs) + return + + instructions_text_parts: "list[TextPart]" = [] + if explicit_instructions is not None and _is_given(explicit_instructions): + instructions_text_parts.append( + { + "type": "text", + "content": explicit_instructions, + } + ) + + system_instructions = _get_system_instructions_responses(messages) + # Deliberate use of function accepting completions API type because + # of shared structure FOR THIS PURPOSE ONLY. + instructions_text_parts += _transform_system_instructions(system_instructions) + + if len(instructions_text_parts) > 0: + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + instructions_text_parts, + unpack=False, + ) + + if isinstance(messages, str): + normalized_messages = normalize_message_roles([messages]) # type: ignore + scope = sentry_sdk.get_current_scope() + messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) + if messages_data is not None: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False + ) + + set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") + _commmon_set_input_data(span, kwargs) + return + + non_system_messages = [ + message for message in messages if not _is_system_instruction_responses(message) + ] + if len(non_system_messages) > 0: + normalized_messages = normalize_message_roles(non_system_messages) scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(normalized_messages, span, scope) if messages_data is not None: diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 17b5e135dd..f0d2df35b4 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -1499,12 +1499,79 @@ def test_ai_client_span_responses_api_no_pii(sentry_init, capture_events): "thread.name": mock.ANY, } + assert "gen_ai.system_instructions" not in spans[0]["data"] assert "gen_ai.request.messages" not in spans[0]["data"] assert "gen_ai.response.text" not in spans[0]["data"] +@pytest.mark.parametrize( + "instructions", + ( + omit, + None, + "You are a coding assistant that talks like a pirate.", + ), +) +@pytest.mark.parametrize( + "input", + [ + pytest.param( + "How do I check if a Python object is an instance of a class?", id="string" + ), + pytest.param( + [ + { + "role": "system", + "content": "You are a helpful assistant.", + }, + {"role": "user", "content": "hello"}, + ], + id="blocks_no_type", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": "You are a helpful assistant.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"role": "user", "content": "hello"}, + ], + id="parts_no_type", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="parts", + ), + ], +) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -def test_ai_client_span_responses_api(sentry_init, capture_events): +def test_ai_client_span_responses_api( + sentry_init, capture_events, instructions, input, request +): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -1518,8 +1585,8 @@ def test_ai_client_span_responses_api(sentry_init, capture_events): with start_transaction(name="openai tx"): client.responses.create( model="gpt-4o", - instructions="You are a coding assistant that talks like a pirate.", - input="How do I check if a Python object is an instance of a class?", + instructions=instructions, + input=input, ) (transaction,) = events @@ -1528,10 +1595,9 @@ def test_ai_client_span_responses_api(sentry_init, capture_events): assert len(spans) == 1 assert spans[0]["op"] == "gen_ai.responses" assert spans[0]["origin"] == "auto.ai.openai" - assert spans[0]["data"] == { + + expected_data = { "gen_ai.operation.name": "responses", - "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', - "gen_ai.request.model": "gpt-4o", "gen_ai.system": "openai", "gen_ai.response.model": "response-model-id", "gen_ai.usage.input_tokens": 20, @@ -1539,11 +1605,168 @@ def test_ai_client_span_responses_api(sentry_init, capture_events): "gen_ai.usage.output_tokens": 10, "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, + "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", "thread.id": mock.ANY, "thread.name": mock.ANY, } + param_id = request.node.callspec.id + if "string" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif instructions is None or isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + else: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + + assert spans[0]["data"] == expected_data + @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_error_in_responses_api(sentry_init, capture_events): @@ -1584,7 +1807,73 @@ def test_error_in_responses_api(sentry_init, capture_events): @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") -async def test_ai_client_span_responses_async_api(sentry_init, capture_events): +@pytest.mark.parametrize( + "instructions", + ( + omit, + None, + "You are a coding assistant that talks like a pirate.", + ), +) +@pytest.mark.parametrize( + "input", + [ + pytest.param( + "How do I check if a Python object is an instance of a class?", id="string" + ), + pytest.param( + [ + { + "role": "system", + "content": "You are a helpful assistant.", + }, + {"role": "user", "content": "hello"}, + ], + id="blocks_no_type", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": "You are a helpful assistant.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"role": "user", "content": "hello"}, + ], + id="parts_no_type", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="parts", + ), + ], +) +async def test_ai_client_span_responses_async_api( + sentry_init, capture_events, instructions, input, request +): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], traces_sample_rate=1.0, @@ -1598,8 +1887,8 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events): with start_transaction(name="openai tx"): await client.responses.create( model="gpt-4o", - instructions="You are a coding assistant that talks like a pirate.", - input="How do I check if a Python object is an instance of a class?", + instructions=instructions, + input=input, ) (transaction,) = events @@ -1608,7 +1897,8 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events): assert len(spans) == 1 assert spans[0]["op"] == "gen_ai.responses" assert spans[0]["origin"] == "auto.ai.openai" - assert spans[0]["data"] == { + + expected_data = { "gen_ai.operation.name": "responses", "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', "gen_ai.request.model": "gpt-4o", @@ -1624,11 +1914,231 @@ async def test_ai_client_span_responses_async_api(sentry_init, capture_events): "thread.name": mock.ANY, } + param_id = request.node.callspec.id + if "string" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif instructions is None or isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + else: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + + assert spans[0]["data"] == expected_data + @pytest.mark.asyncio +@pytest.mark.parametrize( + "instructions", + ( + omit, + None, + "You are a coding assistant that talks like a pirate.", + ), +) +@pytest.mark.parametrize( + "input", + [ + pytest.param( + "How do I check if a Python object is an instance of a class?", id="string" + ), + pytest.param( + [ + { + "role": "system", + "content": "You are a helpful assistant.", + }, + {"role": "user", "content": "hello"}, + ], + id="blocks_no_type", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": "You are a helpful assistant.", + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="blocks", + ), + pytest.param( + [ + { + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"role": "user", "content": "hello"}, + ], + id="parts_no_type", + ), + pytest.param( + [ + { + "type": "message", + "role": "system", + "content": [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + }, + {"type": "message", "role": "user", "content": "hello"}, + ], + id="parts", + ), + ], +) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") async def test_ai_client_span_streaming_responses_async_api( - sentry_init, capture_events + sentry_init, capture_events, instructions, input, request ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], @@ -1643,8 +2153,8 @@ async def test_ai_client_span_streaming_responses_async_api( with start_transaction(name="openai tx"): await client.responses.create( model="gpt-4o", - instructions="You are a coding assistant that talks like a pirate.", - input="How do I check if a Python object is an instance of a class?", + instructions=instructions, + input=input, stream=True, ) @@ -1654,23 +2164,179 @@ async def test_ai_client_span_streaming_responses_async_api( assert len(spans) == 1 assert spans[0]["op"] == "gen_ai.responses" assert spans[0]["origin"] == "auto.ai.openai" - assert spans[0]["data"] == { + + expected_data = { "gen_ai.operation.name": "responses", - "gen_ai.request.messages": '["How do I check if a Python object is an instance of a class?"]', - "gen_ai.request.model": "gpt-4o", - "gen_ai.response.model": "response-model-id", "gen_ai.response.streaming": True, "gen_ai.system": "openai", + "gen_ai.response.model": "response-model-id", "gen_ai.usage.input_tokens": 20, "gen_ai.usage.input_tokens.cached": 5, "gen_ai.usage.output_tokens": 10, "gen_ai.usage.output_tokens.reasoning": 8, "gen_ai.usage.total_tokens": 30, + "gen_ai.request.model": "gpt-4o", "gen_ai.response.text": "the model response", "thread.id": mock.ANY, "thread.name": mock.ANY, } + param_id = request.node.callspec.id + if "string" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "string" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + } + ] + ), + "gen_ai.request.messages": safe_serialize( + ["How do I check if a Python object is an instance of a class?"] + ), + } + ) + elif "blocks_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [{"type": "text", "content": "You are a helpful assistant."}] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "blocks" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id and ( + instructions is None or isinstance(instructions, Omit) + ): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif "parts_no_type" in param_id: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"role": "user", "content": "hello"}] + ), + } + ) + elif instructions is None or isinstance(instructions, Omit): # type: ignore + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + else: + expected_data.update( + { + "gen_ai.system_instructions": safe_serialize( + [ + { + "type": "text", + "content": "You are a coding assistant that talks like a pirate.", + }, + {"type": "text", "content": "You are a helpful assistant."}, + {"type": "text", "content": "Be concise and clear."}, + ] + ), + "gen_ai.request.messages": safe_serialize( + [{"type": "message", "role": "user", "content": "hello"}] + ), + } + ) + + assert spans[0]["data"] == expected_data + @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")