Skip to content

Commit ab8beaf

Browse files
committed
avoid triggering structured output paths without a schema
1 parent d9ff99a commit ab8beaf

File tree

3 files changed

+16
-5
lines changed

3 files changed

+16
-5
lines changed

llama_stack/providers/inline/agents/meta_reference/responses/streaming.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,22 @@ async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
9797
messages = self.ctx.messages.copy()
9898

9999
while True:
100+
# Only forward response_format when it's a JSON schema; omit "text"/"json_object" to prevent
101+
# OpenAI-compatible providers (e.g., vLLM) from enabling structured output without a schema.
102+
safe_response_format = self.ctx.response_format
103+
try:
104+
if safe_response_format is not None and getattr(safe_response_format, "type", None) != "json_schema":
105+
safe_response_format = None
106+
except Exception:
107+
safe_response_format = None
108+
100109
completion_result = await self.inference_api.openai_chat_completion(
101110
model=self.ctx.model,
102111
messages=messages,
103112
tools=self.ctx.chat_tools,
104113
stream=True,
105114
temperature=self.ctx.temperature,
106-
response_format=self.ctx.response_format,
115+
response_format=safe_response_format,
107116
)
108117

109118
# Process streaming chunks and build complete response

llama_stack/providers/inline/agents/meta_reference/responses/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,4 @@ class ChatCompletionContext(BaseModel):
5757
response_tools: list[OpenAIResponseInputTool] | None = None
5858
chat_tools: list[ChatCompletionToolParam] | None = None
5959
temperature: float | None
60-
response_format: OpenAIResponseFormatParam
60+
response_format: OpenAIResponseFormatParam | None = None

llama_stack/providers/inline/agents/meta_reference/responses/utils.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
OpenAIResponseFormatJSONObject,
3737
OpenAIResponseFormatJSONSchema,
3838
OpenAIResponseFormatParam,
39-
OpenAIResponseFormatText,
4039
OpenAISystemMessageParam,
4140
OpenAIToolMessageParam,
4241
OpenAIUserMessageParam,
@@ -170,12 +169,15 @@ async def convert_response_input_to_chat_messages(
170169

171170
async def convert_response_text_to_chat_response_format(
172171
text: OpenAIResponseText,
173-
) -> OpenAIResponseFormatParam:
172+
) -> OpenAIResponseFormatParam | None:
174173
"""
175174
Convert an OpenAI Response text parameter into an OpenAI Chat Completion response format.
176175
"""
177176
if not text.format or text.format["type"] == "text":
178-
return OpenAIResponseFormatText(type="text")
177+
# Do not forward a plain text response_format to OpenAI-compatible providers
178+
# to avoid triggering structured output paths without a schema
179+
return None
180+
179181
if text.format["type"] == "json_object":
180182
return OpenAIResponseFormatJSONObject()
181183
if text.format["type"] == "json_schema":

0 commit comments

Comments
 (0)