avoid triggering structured output paths without a schema

luis5tb · luis5tb · commit ab8beaf29ab5 · 2025-09-23T09:28:35.000+02:00
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
@@ -97,13 +97,22 @@ async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
         messages = self.ctx.messages.copy()
 
         while True:
+            # Only forward response_format when it's a JSON schema; omit "text"/"json_object" to prevent
+            # OpenAI-compatible providers (e.g., vLLM) from enabling structured output without a schema.
+            safe_response_format = self.ctx.response_format
+            try:
+                if safe_response_format is not None and getattr(safe_response_format, "type", None) != "json_schema":
+                    safe_response_format = None
+            except Exception:
+                safe_response_format = None
+
             completion_result = await self.inference_api.openai_chat_completion(
                 model=self.ctx.model,
                 messages=messages,
                 tools=self.ctx.chat_tools,
                 stream=True,
                 temperature=self.ctx.temperature,
-                response_format=self.ctx.response_format,
+                response_format=safe_response_format,
             )
 
             # Process streaming chunks and build complete response
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/llama_stack/providers/inline/agents/meta_reference/responses/types.py
@@ -57,4 +57,4 @@ class ChatCompletionContext(BaseModel):
     response_tools: list[OpenAIResponseInputTool] | None = None
     chat_tools: list[ChatCompletionToolParam] | None = None
     temperature: float | None
-    response_format: OpenAIResponseFormatParam
+    response_format: OpenAIResponseFormatParam | None = None
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
@@ -36,7 +36,6 @@
     OpenAIResponseFormatJSONObject,
     OpenAIResponseFormatJSONSchema,
     OpenAIResponseFormatParam,
-    OpenAIResponseFormatText,
     OpenAISystemMessageParam,
     OpenAIToolMessageParam,
     OpenAIUserMessageParam,
@@ -170,12 +169,15 @@ async def convert_response_input_to_chat_messages(
 
 async def convert_response_text_to_chat_response_format(
     text: OpenAIResponseText,
-) -> OpenAIResponseFormatParam:
+) -> OpenAIResponseFormatParam | None:
     """
     Convert an OpenAI Response text parameter into an OpenAI Chat Completion response format.
     """
     if not text.format or text.format["type"] == "text":
-        return OpenAIResponseFormatText(type="text")
+        # Do not forward a plain text response_format to OpenAI-compatible providers
+        # to avoid triggering structured output paths without a schema
+        return None
+
     if text.format["type"] == "json_object":
         return OpenAIResponseFormatJSONObject()
     if text.format["type"] == "json_schema":