From 957c8824b9fc5a39fcbb1e535bfe3ac2e6f18d56 Mon Sep 17 00:00:00 2001 From: Vladimir Ivic Date: Sat, 25 Jan 2025 18:37:42 -0800 Subject: [PATCH] Include text/event-stream header only when stream=True Summary: We want to use the headers to negotiate content. Sending this header in every request will cause server to return chunks, even without the stream=True param. ``` llama-stack-client inference chat-completion --message="Hello there" {"event":{"event_type":"start","delta":"Hello"}} {"event":{"event_type":"progress","delta":"!"}} {"event":{"event_type":"progress","delta":" How"}} {"event":{"event_type":"progress","delta":" are"}} {"event":{"event_type":"progress","delta":" you"}} {"event":{"event_type":"progress","delta":" today"}} ``` Test Plan: ``` pip install . llama-stack-client configure --endpoint={endpoint} --api-key={api-key} llama-stack-client inference chat-completion --message="Hello there" ChatCompletionResponse(completion_message=CompletionMessage(content='Hello! How can I assist you today?', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None) ``` --- src/llama_stack_client/resources/inference.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py index 8e572c9d..29bb0243 100644 --- a/src/llama_stack_client/resources/inference.py +++ b/src/llama_stack_client/resources/inference.py @@ -213,7 +213,8 @@ def chat_completion( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> InferenceChatCompletionResponse | Stream[InferenceChatCompletionResponse]: - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} + if stream is True: + extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} extra_headers = { **strip_not_given( { @@ -364,7 +365,8 @@ def completion( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> InferenceCompletionResponse | Stream[InferenceCompletionResponse]: - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} + if stream is True: + extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} extra_headers = { **strip_not_given( { @@ -623,7 +625,8 @@ async def chat_completion( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> InferenceChatCompletionResponse | AsyncStream[InferenceChatCompletionResponse]: - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} + if stream is True: + extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} extra_headers = { **strip_not_given( { @@ -774,7 +777,8 @@ async def completion( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> InferenceCompletionResponse | AsyncStream[InferenceCompletionResponse]: - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} + if stream is True: + extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} extra_headers = { **strip_not_given( {