From 30ebc752ed432862990a1bd73053bc89e839ecbf Mon Sep 17 00:00:00 2001
From: toshihiro-ohtani <toshihiro0219@gmail.com>
Date: Mon, 26 Jan 2026 22:51:52 +0900
Subject: [PATCH] Fix: Preserve text content when usage is present in streaming
 chunks

  - Remove early return in _parse_response_update_from_openai when chunk.usage exists
  - Process usage alongside text and tool calls in the same chunk
  - Fixes streaming with Gemini and other providers that include usage+text in same chunk
---
 .../agent_framework/openai/_chat_client.py    | 25 +++++++++----------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/python/packages/core/agent_framework/openai/_chat_client.py b/python/packages/core/agent_framework/openai/_chat_client.py
index a1bc1f846a..e9be885a9c 100644
--- a/python/packages/core/agent_framework/openai/_chat_client.py
+++ b/python/packages/core/agent_framework/openai/_chat_client.py
@@ -310,21 +310,19 @@ def _parse_response_update_from_openai(
     ) -> ChatResponseUpdate:
         """Parse a streaming response update from OpenAI."""
         chunk_metadata = self._get_metadata_from_streaming_chat_response(chunk)
-        if chunk.usage:
-            return ChatResponseUpdate(
-                role=Role.ASSISTANT,
-                contents=[
-                    Content.from_usage(
-                        usage_details=self._parse_usage_from_openai(chunk.usage), raw_representation=chunk
-                    )
-                ],
-                model_id=chunk.model,
-                additional_properties=chunk_metadata,
-                response_id=chunk.id,
-                message_id=chunk.id,
-            )
         contents: list[Content] = []
         finish_reason: FinishReason | None = None
+
+        # BUGFIX: Process usage alongside text/tool calls instead of early return
+        # Gemini (and potentially other providers) include both usage and content in the same chunk
+        if chunk.usage:
+            contents.append(
+                Content.from_usage(
+                    usage_details=self._parse_usage_from_openai(chunk.usage), raw_representation=chunk
+                )
+            )
+
+        # Process text and tool calls from choices
         for choice in chunk.choices:
             chunk_metadata.update(self._get_metadata_from_chat_choice(choice))
             contents.extend(self._parse_tool_calls_from_openai(choice))
@@ -335,6 +333,7 @@ def _parse_response_update_from_openai(
                 contents.append(text_content)
             if reasoning_details := getattr(choice.delta, "reasoning_details", None):
                 contents.append(Content.from_text_reasoning(protected_data=json.dumps(reasoning_details)))
+
         return ChatResponseUpdate(
             created_at=datetime.fromtimestamp(chunk.created, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
             contents=contents,