Skip to content

Commit 957c882

Browse files
committed
Include text/event-stream header only when stream=True
Summary: We want to use the headers to negotiate content. Sending this header in every request will cause server to return chunks, even without the stream=True param. ``` llama-stack-client inference chat-completion --message="Hello there" {"event":{"event_type":"start","delta":"Hello"}} {"event":{"event_type":"progress","delta":"!"}} {"event":{"event_type":"progress","delta":" How"}} {"event":{"event_type":"progress","delta":" are"}} {"event":{"event_type":"progress","delta":" you"}} {"event":{"event_type":"progress","delta":" today"}} ``` Test Plan: ``` pip install . llama-stack-client configure --endpoint={endpoint} --api-key={api-key} llama-stack-client inference chat-completion --message="Hello there" ChatCompletionResponse(completion_message=CompletionMessage(content='Hello! How can I assist you today?', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None) ```
1 parent f5d4cfe commit 957c882

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

src/llama_stack_client/resources/inference.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,8 @@ def chat_completion(
213213
extra_body: Body | None = None,
214214
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
215215
) -> InferenceChatCompletionResponse | Stream[InferenceChatCompletionResponse]:
216-
extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
216+
if stream is True:
217+
extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
217218
extra_headers = {
218219
**strip_not_given(
219220
{
@@ -364,7 +365,8 @@ def completion(
364365
extra_body: Body | None = None,
365366
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
366367
) -> InferenceCompletionResponse | Stream[InferenceCompletionResponse]:
367-
extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
368+
if stream is True:
369+
extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
368370
extra_headers = {
369371
**strip_not_given(
370372
{
@@ -623,7 +625,8 @@ async def chat_completion(
623625
extra_body: Body | None = None,
624626
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
625627
) -> InferenceChatCompletionResponse | AsyncStream[InferenceChatCompletionResponse]:
626-
extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
628+
if stream is True:
629+
extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
627630
extra_headers = {
628631
**strip_not_given(
629632
{
@@ -774,7 +777,8 @@ async def completion(
774777
extra_body: Body | None = None,
775778
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
776779
) -> InferenceCompletionResponse | AsyncStream[InferenceCompletionResponse]:
777-
extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
780+
if stream is True:
781+
extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
778782
extra_headers = {
779783
**strip_not_given(
780784
{

0 commit comments

Comments
 (0)