diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py index 8e572c9d..29bb0243 100644 --- a/src/llama_stack_client/resources/inference.py +++ b/src/llama_stack_client/resources/inference.py @@ -213,7 +213,8 @@ def chat_completion( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> InferenceChatCompletionResponse | Stream[InferenceChatCompletionResponse]: - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} + if stream is True: + extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} extra_headers = { **strip_not_given( { @@ -364,7 +365,8 @@ def completion( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> InferenceCompletionResponse | Stream[InferenceCompletionResponse]: - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} + if stream is True: + extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} extra_headers = { **strip_not_given( { @@ -623,7 +625,8 @@ async def chat_completion( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> InferenceChatCompletionResponse | AsyncStream[InferenceChatCompletionResponse]: - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} + if stream is True: + extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} extra_headers = { **strip_not_given( { @@ -774,7 +777,8 @@ async def completion( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> InferenceCompletionResponse | AsyncStream[InferenceCompletionResponse]: - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} + if stream is True: + extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} extra_headers = { **strip_not_given( {