@@ -143,6 +143,83 @@ async def EXAMPLE_STREAMED_RESPONSE(*args, **kwargs):
143143 )
144144
145145
146+ async def EXAMPLE_STREAMED_RESPONSE_WITH_DELTA (* args , ** kwargs ):
147+ yield ResponseCreatedEvent (
148+ response = Response (
149+ id = "chat-id" ,
150+ output = [],
151+ parallel_tool_calls = False ,
152+ tool_choice = "none" ,
153+ tools = [],
154+ created_at = 10000000 ,
155+ model = "response-model-id" ,
156+ object = "response" ,
157+ ),
158+ type = "response.created" ,
159+ sequence_number = 0 ,
160+ )
161+
162+ yield ResponseTextDeltaEvent (
163+ type = "response.output_text.delta" ,
164+ item_id = "message-id" ,
165+ output_index = 0 ,
166+ content_index = 0 ,
167+ delta = "Hello" ,
168+ logprobs = [],
169+ sequence_number = 1 ,
170+ )
171+
172+ yield ResponseTextDeltaEvent (
173+ type = "response.output_text.delta" ,
174+ item_id = "message-id" ,
175+ output_index = 0 ,
176+ content_index = 0 ,
177+ delta = " world!" ,
178+ logprobs = [],
179+ sequence_number = 2 ,
180+ )
181+
182+ yield ResponseCompletedEvent (
183+ response = Response (
184+ id = "chat-id" ,
185+ output = [
186+ ResponseOutputMessage (
187+ id = "message-id" ,
188+ content = [
189+ ResponseOutputText (
190+ annotations = [],
191+ text = "Hello world!" ,
192+ type = "output_text" ,
193+ ),
194+ ],
195+ role = "assistant" ,
196+ status = "completed" ,
197+ type = "message" ,
198+ ),
199+ ],
200+ parallel_tool_calls = False ,
201+ tool_choice = "none" ,
202+ tools = [],
203+ created_at = 10000000 ,
204+ model = "response-model-id" ,
205+ object = "response" ,
206+ usage = ResponseUsage (
207+ input_tokens = 20 ,
208+ input_tokens_details = InputTokensDetails (
209+ cached_tokens = 5 ,
210+ ),
211+ output_tokens = 10 ,
212+ output_tokens_details = OutputTokensDetails (
213+ reasoning_tokens = 8 ,
214+ ),
215+ total_tokens = 30 ,
216+ ),
217+ ),
218+ type = "response.completed" ,
219+ sequence_number = 3 ,
220+ )
221+
222+
146223@pytest .fixture
147224def mock_usage ():
148225 return Usage (
@@ -2692,27 +2769,6 @@ def test_openai_agents_message_truncation(sentry_init, capture_events):
26922769 assert "small message 5" in str (parsed_messages [0 ])
26932770
26942771
2695- def test_streaming_patches_applied (sentry_init ):
2696- """
2697- Test that the streaming patches are applied correctly.
2698- """
2699- sentry_init (
2700- integrations = [OpenAIAgentsIntegration ()],
2701- traces_sample_rate = 1.0 ,
2702- )
2703-
2704- # Verify that run_streamed is patched (will have __wrapped__ attribute if patched)
2705- import agents
2706-
2707- # Check that the method exists and has been modified
2708- assert hasattr (agents .run .DEFAULT_AGENT_RUNNER , "run_streamed" )
2709- assert hasattr (agents .run .AgentRunner , "_run_single_turn_streamed" )
2710-
2711- # Verify the patches were applied by checking for our wrapper
2712- run_streamed_func = agents .run .DEFAULT_AGENT_RUNNER .run_streamed
2713- assert run_streamed_func is not None
2714-
2715-
27162772@pytest .mark .asyncio
27172773async def test_streaming_span_update_captures_response_data (
27182774 sentry_init , test_agent , mock_usage
@@ -2777,86 +2833,46 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent):
27772833 Events WITHOUT delta (like ResponseCompletedEvent, ResponseCreatedEvent, etc.)
27782834 should NOT trigger TTFT.
27792835 """
2780- from sentry_sdk .integrations .openai_agents .patches .models import (
2781- _create_get_model_wrapper ,
2836+ client = AsyncOpenAI (api_key = "z" )
2837+ client .responses ._post = AsyncMock (return_value = EXAMPLE_RESPONSE )
2838+
2839+ model = OpenAIResponsesModel (model = "gpt-4" , openai_client = client )
2840+
2841+ agent_with_tool = test_agent .clone (
2842+ model = model ,
27822843 )
27832844
27842845 sentry_init (
27852846 integrations = [OpenAIAgentsIntegration ()],
27862847 traces_sample_rate = 1.0 ,
27872848 )
27882849
2789- # Create a mock model with stream_response and get_response
2790- class MockModel :
2791- model = "gpt-4"
2792-
2793- async def get_response (self , * args , ** kwargs ):
2794- # Not used in this test, but required by the wrapper
2795- pass
2796-
2797- async def stream_response (self , * args , ** kwargs ):
2798- # First event: ResponseCreatedEvent (no delta - should NOT trigger TTFT)
2799- created_event = MagicMock (spec = ["type" , "sequence_number" ])
2800- created_event .type = "response.created"
2801- yield created_event
2802-
2803- # Simulate server-side processing delay before first token
2804- await asyncio .sleep (0.05 ) # 50ms delay
2805-
2806- # Second event: ResponseTextDeltaEvent (HAS delta - triggers TTFT)
2807- text_delta_event = MagicMock (spec = ["delta" , "type" , "content_index" ])
2808- text_delta_event .delta = "Hello"
2809- text_delta_event .type = "response.output_text.delta"
2810- yield text_delta_event
2811-
2812- # Third event: more text content (also has delta, but TTFT already recorded)
2813- text_delta_event2 = MagicMock (spec = ["delta" , "type" , "content_index" ])
2814- text_delta_event2 .delta = " world!"
2815- text_delta_event2 .type = "response.output_text.delta"
2816- yield text_delta_event2
2817-
2818- # Final event: ResponseCompletedEvent (has response, no delta)
2819- completed_event = MagicMock (spec = ["response" , "type" , "sequence_number" ])
2820- completed_event .response = MagicMock ()
2821- completed_event .response .model = "gpt-4"
2822- completed_event .response .usage = Usage (
2823- requests = 1 ,
2824- input_tokens = 10 ,
2825- output_tokens = 5 ,
2826- total_tokens = 15 ,
2827- )
2828- completed_event .response .output = []
2829- yield completed_event
2830-
2831- # Create a mock original _get_model that returns our mock model
2832- def mock_get_model (agent , run_config ):
2833- return MockModel ()
2834-
2835- # Wrap it with our integration wrapper
2836- wrapped_get_model = _create_get_model_wrapper (mock_get_model )
2837-
2838- with sentry_sdk .start_transaction (name = "test_ttft" , sampled = True ) as transaction :
2839- # Get the wrapped model (this applies the stream_response wrapper)
2840- wrapped_model = wrapped_get_model (None , test_agent , MagicMock ())
2841-
2842- # Call the wrapped stream_response and consume all events
2843- async for _event in wrapped_model .stream_response ():
2844- pass
2845-
2846- # Verify TTFT is recorded on the chat span (must be inside transaction context)
2847- chat_spans = [
2848- s for s in transaction ._span_recorder .spans if s .op == "gen_ai.chat"
2849- ]
2850- assert len (chat_spans ) >= 1
2851- chat_span = chat_spans [0 ]
2850+ with patch .object (
2851+ model ._client .responses ,
2852+ "create" ,
2853+ side_effect = EXAMPLE_STREAMED_RESPONSE_WITH_DELTA ,
2854+ ) as _ :
2855+ with sentry_sdk .start_transaction (
2856+ name = "test_ttft" , sampled = True
2857+ ) as transaction :
2858+ result = agents .Runner .run_streamed (
2859+ agent_with_tool ,
2860+ "Please use the simple test tool" ,
2861+ run_config = test_run_config ,
2862+ )
28522863
2853- assert SPANDATA .GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span ._data
2854- ttft_value = chat_span ._data [SPANDATA .GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN ]
2855- # TTFT should be at least 40ms (our simulated delay minus some variance) but reasonable
2856- assert 0.04 < ttft_value < 1.0 , f"TTFT { ttft_value } should be around 50ms"
2864+ async for event in result .stream_events ():
2865+ pass
2866+
2867+ # Verify TTFT is recorded on the chat span (must be inside transaction context)
2868+ chat_spans = [
2869+ s for s in transaction ._span_recorder .spans if s .op == "gen_ai.chat"
2870+ ]
2871+ assert len (chat_spans ) >= 1
2872+ chat_span = chat_spans [0 ]
28572873
2858- # Verify streaming flag is set
2859- assert chat_span ._data .get (SPANDATA .GEN_AI_RESPONSE_STREAMING ) is True
2874+ assert SPANDATA . GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span . _data
2875+ assert chat_span ._data .get (SPANDATA .GEN_AI_RESPONSE_STREAMING ) is True
28602876
28612877
28622878@pytest .mark .skipif (
0 commit comments