From 30ebc752ed432862990a1bd73053bc89e839ecbf Mon Sep 17 00:00:00 2001 From: toshihiro-ohtani Date: Mon, 26 Jan 2026 22:51:52 +0900 Subject: [PATCH] Fix: Preserve text content when usage is present in streaming chunks - Remove early return in _parse_response_update_from_openai when chunk.usage exists - Process usage alongside text and tool calls in the same chunk - Fixes streaming with Gemini and other providers that include usage+text in same chunk --- .../agent_framework/openai/_chat_client.py | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/python/packages/core/agent_framework/openai/_chat_client.py b/python/packages/core/agent_framework/openai/_chat_client.py index a1bc1f846a..e9be885a9c 100644 --- a/python/packages/core/agent_framework/openai/_chat_client.py +++ b/python/packages/core/agent_framework/openai/_chat_client.py @@ -310,21 +310,19 @@ def _parse_response_update_from_openai( ) -> ChatResponseUpdate: """Parse a streaming response update from OpenAI.""" chunk_metadata = self._get_metadata_from_streaming_chat_response(chunk) - if chunk.usage: - return ChatResponseUpdate( - role=Role.ASSISTANT, - contents=[ - Content.from_usage( - usage_details=self._parse_usage_from_openai(chunk.usage), raw_representation=chunk - ) - ], - model_id=chunk.model, - additional_properties=chunk_metadata, - response_id=chunk.id, - message_id=chunk.id, - ) contents: list[Content] = [] finish_reason: FinishReason | None = None + + # BUGFIX: Process usage alongside text/tool calls instead of early return + # Gemini (and potentially other providers) include both usage and content in the same chunk + if chunk.usage: + contents.append( + Content.from_usage( + usage_details=self._parse_usage_from_openai(chunk.usage), raw_representation=chunk + ) + ) + + # Process text and tool calls from choices for choice in chunk.choices: chunk_metadata.update(self._get_metadata_from_chat_choice(choice)) contents.extend(self._parse_tool_calls_from_openai(choice)) @@ -335,6 +333,7 @@ def _parse_response_update_from_openai( contents.append(text_content) if reasoning_details := getattr(choice.delta, "reasoning_details", None): contents.append(Content.from_text_reasoning(protected_data=json.dumps(reasoning_details))) + return ChatResponseUpdate( created_at=datetime.fromtimestamp(chunk.created, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), contents=contents,