Skip to content

Commit 3064154

Browse files
committed
Sync updates from stainless branch: ashwinb/dev
1 parent a3c0674 commit 3064154

17 files changed

+76
-168
lines changed

src/llama_stack_client/resources/batch_inference.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
async_to_streamed_response_wrapper,
2424
)
2525
from .._base_client import make_request_options
26+
from ..types.shared_params.message import Message
2627
from ..types.shared.batch_completion import BatchCompletion
2728
from ..types.shared_params.sampling_params import SamplingParams
2829
from ..types.shared_params.interleaved_content import InterleavedContent
@@ -54,7 +55,7 @@ def with_streaming_response(self) -> BatchInferenceResourceWithStreamingResponse
5455
def chat_completion(
5556
self,
5657
*,
57-
messages_batch: Iterable[Iterable[batch_inference_chat_completion_params.MessagesBatch]],
58+
messages_batch: Iterable[Iterable[Message]],
5859
model: str,
5960
logprobs: batch_inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
6061
sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
@@ -195,7 +196,7 @@ def with_streaming_response(self) -> AsyncBatchInferenceResourceWithStreamingRes
195196
async def chat_completion(
196197
self,
197198
*,
198-
messages_batch: Iterable[Iterable[batch_inference_chat_completion_params.MessagesBatch]],
199+
messages_batch: Iterable[Iterable[Message]],
199200
model: str,
200201
logprobs: batch_inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
201202
sampling_params: SamplingParams | NotGiven = NOT_GIVEN,

src/llama_stack_client/resources/inference.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from .._streaming import Stream, AsyncStream
3131
from .._base_client import make_request_options
3232
from ..types.embeddings_response import EmbeddingsResponse
33+
from ..types.shared_params.message import Message
3334
from ..types.inference_completion_response import InferenceCompletionResponse
3435
from ..types.shared_params.sampling_params import SamplingParams
3536
from ..types.shared_params.interleaved_content import InterleavedContent
@@ -62,7 +63,7 @@ def with_streaming_response(self) -> InferenceResourceWithStreamingResponse:
6263
def chat_completion(
6364
self,
6465
*,
65-
messages: Iterable[inference_chat_completion_params.Message],
66+
messages: Iterable[Message],
6667
model_id: str,
6768
logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
6869
response_format: inference_chat_completion_params.ResponseFormat | NotGiven = NOT_GIVEN,
@@ -106,7 +107,7 @@ def chat_completion(
106107
def chat_completion(
107108
self,
108109
*,
109-
messages: Iterable[inference_chat_completion_params.Message],
110+
messages: Iterable[Message],
110111
model_id: str,
111112
stream: Literal[True],
112113
logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
@@ -150,7 +151,7 @@ def chat_completion(
150151
def chat_completion(
151152
self,
152153
*,
153-
messages: Iterable[inference_chat_completion_params.Message],
154+
messages: Iterable[Message],
154155
model_id: str,
155156
stream: bool,
156157
logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
@@ -194,7 +195,7 @@ def chat_completion(
194195
def chat_completion(
195196
self,
196197
*,
197-
messages: Iterable[inference_chat_completion_params.Message],
198+
messages: Iterable[Message],
198199
model_id: str,
199200
logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
200201
response_format: inference_chat_completion_params.ResponseFormat | NotGiven = NOT_GIVEN,
@@ -472,7 +473,7 @@ def with_streaming_response(self) -> AsyncInferenceResourceWithStreamingResponse
472473
async def chat_completion(
473474
self,
474475
*,
475-
messages: Iterable[inference_chat_completion_params.Message],
476+
messages: Iterable[Message],
476477
model_id: str,
477478
logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
478479
response_format: inference_chat_completion_params.ResponseFormat | NotGiven = NOT_GIVEN,
@@ -516,7 +517,7 @@ async def chat_completion(
516517
async def chat_completion(
517518
self,
518519
*,
519-
messages: Iterable[inference_chat_completion_params.Message],
520+
messages: Iterable[Message],
520521
model_id: str,
521522
stream: Literal[True],
522523
logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
@@ -560,7 +561,7 @@ async def chat_completion(
560561
async def chat_completion(
561562
self,
562563
*,
563-
messages: Iterable[inference_chat_completion_params.Message],
564+
messages: Iterable[Message],
564565
model_id: str,
565566
stream: bool,
566567
logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
@@ -604,7 +605,7 @@ async def chat_completion(
604605
async def chat_completion(
605606
self,
606607
*,
607-
messages: Iterable[inference_chat_completion_params.Message],
608+
messages: Iterable[Message],
608609
model_id: str,
609610
logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
610611
response_format: inference_chat_completion_params.ResponseFormat | NotGiven = NOT_GIVEN,

src/llama_stack_client/resources/synthetic_data_generation.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
async_to_streamed_response_wrapper,
2424
)
2525
from .._base_client import make_request_options
26+
from ..types.shared_params.message import Message
2627
from ..types.synthetic_data_generation_response import SyntheticDataGenerationResponse
2728

2829
__all__ = ["SyntheticDataGenerationResource", "AsyncSyntheticDataGenerationResource"]
@@ -51,7 +52,7 @@ def with_streaming_response(self) -> SyntheticDataGenerationResourceWithStreamin
5152
def generate(
5253
self,
5354
*,
54-
dialogs: Iterable[synthetic_data_generation_generate_params.Dialog],
55+
dialogs: Iterable[Message],
5556
filtering_function: Literal["none", "random", "top_k", "top_p", "top_k_top_p", "sigmoid"],
5657
model: str | NotGiven = NOT_GIVEN,
5758
x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
@@ -122,7 +123,7 @@ def with_streaming_response(self) -> AsyncSyntheticDataGenerationResourceWithStr
122123
async def generate(
123124
self,
124125
*,
125-
dialogs: Iterable[synthetic_data_generation_generate_params.Dialog],
126+
dialogs: Iterable[Message],
126127
filtering_function: Literal["none", "random", "top_k", "top_p", "top_k_top_p", "sigmoid"],
127128
model: str | NotGiven = NOT_GIVEN,
128129
x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,

src/llama_stack_client/types/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .trace import Trace as Trace
99
from .shared import (
1010
URL as URL,
11+
Message as Message,
1112
ToolCall as ToolCall,
1213
ParamType as ParamType,
1314
ReturnType as ReturnType,
@@ -19,6 +20,7 @@
1920
SamplingParams as SamplingParams,
2021
BatchCompletion as BatchCompletion,
2122
SafetyViolation as SafetyViolation,
23+
CompletionMessage as CompletionMessage,
2224
InterleavedContent as InterleavedContent,
2325
ToolParamDefinition as ToolParamDefinition,
2426
ToolResponseMessage as ToolResponseMessage,

src/llama_stack_client/types/agents/turn.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,11 @@
77
from ..._models import BaseModel
88
from ..shared.url import URL
99
from ..inference_step import InferenceStep
10-
from ..shared.tool_call import ToolCall
1110
from ..shield_call_step import ShieldCallStep
1211
from ..shared.user_message import UserMessage
1312
from ..tool_execution_step import ToolExecutionStep
1413
from ..memory_retrieval_step import MemoryRetrievalStep
15-
from ..shared.interleaved_content import InterleavedContent
14+
from ..shared.completion_message import CompletionMessage
1615
from ..shared.tool_response_message import ToolResponseMessage
1716
from ..shared.interleaved_content_item import InterleavedContentItem
1817

@@ -24,7 +23,6 @@
2423
"OutputAttachmentContentImageContentItem",
2524
"OutputAttachmentContentImageContentItemImage",
2625
"OutputAttachmentContentTextContentItem",
27-
"OutputMessage",
2826
"Step",
2927
]
3028

@@ -64,16 +62,6 @@ class OutputAttachment(BaseModel):
6462
mime_type: str
6563

6664

67-
class OutputMessage(BaseModel):
68-
content: InterleavedContent
69-
70-
role: Literal["assistant"]
71-
72-
stop_reason: Literal["end_of_turn", "end_of_message", "out_of_tokens"]
73-
74-
tool_calls: List[ToolCall]
75-
76-
7765
Step: TypeAlias = Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep]
7866

7967

@@ -82,7 +70,7 @@ class Turn(BaseModel):
8270

8371
output_attachments: List[OutputAttachment]
8472

85-
output_message: OutputMessage
73+
output_message: CompletionMessage
8674

8775
session_id: str
8876

src/llama_stack_client/types/batch_inference_chat_completion_params.py

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,18 @@
33
from __future__ import annotations
44

55
from typing import Dict, Union, Iterable
6-
from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
6+
from typing_extensions import Literal, Required, Annotated, TypedDict
77

88
from .._utils import PropertyInfo
9-
from .shared_params.tool_call import ToolCall
10-
from .shared_params.user_message import UserMessage
11-
from .shared_params.system_message import SystemMessage
9+
from .shared_params.message import Message
1210
from .shared_params.sampling_params import SamplingParams
13-
from .shared_params.interleaved_content import InterleavedContent
1411
from .shared_params.tool_param_definition import ToolParamDefinition
15-
from .shared_params.tool_response_message import ToolResponseMessage
1612

17-
__all__ = ["BatchInferenceChatCompletionParams", "MessagesBatch", "MessagesBatchCompletionMessage", "Logprobs", "Tool"]
13+
__all__ = ["BatchInferenceChatCompletionParams", "Logprobs", "Tool"]
1814

1915

2016
class BatchInferenceChatCompletionParams(TypedDict, total=False):
21-
messages_batch: Required[Iterable[Iterable[MessagesBatch]]]
17+
messages_batch: Required[Iterable[Iterable[Message]]]
2218

2319
model: Required[str]
2420

@@ -48,19 +44,6 @@ class BatchInferenceChatCompletionParams(TypedDict, total=False):
4844
x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
4945

5046

51-
class MessagesBatchCompletionMessage(TypedDict, total=False):
52-
content: Required[InterleavedContent]
53-
54-
role: Required[Literal["assistant"]]
55-
56-
stop_reason: Required[Literal["end_of_turn", "end_of_message", "out_of_tokens"]]
57-
58-
tool_calls: Required[Iterable[ToolCall]]
59-
60-
61-
MessagesBatch: TypeAlias = Union[UserMessage, SystemMessage, ToolResponseMessage, MessagesBatchCompletionMessage]
62-
63-
6447
class Logprobs(TypedDict, total=False):
6548
top_k: int
6649

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,12 @@
11
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
22

33
from typing import List
4-
from typing_extensions import Literal
54

65
from .._models import BaseModel
7-
from .shared.tool_call import ToolCall
8-
from .shared.interleaved_content import InterleavedContent
6+
from .shared.completion_message import CompletionMessage
97

10-
__all__ = ["BatchInferenceChatCompletionResponse", "CompletionMessageBatch"]
11-
12-
13-
class CompletionMessageBatch(BaseModel):
14-
content: InterleavedContent
15-
16-
role: Literal["assistant"]
17-
18-
stop_reason: Literal["end_of_turn", "end_of_message", "out_of_tokens"]
19-
20-
tool_calls: List[ToolCall]
8+
__all__ = ["BatchInferenceChatCompletionResponse"]
219

2210

2311
class BatchInferenceChatCompletionResponse(BaseModel):
24-
completion_message_batch: List[CompletionMessageBatch]
12+
completion_message_batch: List[CompletionMessage]

src/llama_stack_client/types/inference_chat_completion_params.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,12 @@
66
from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
77

88
from .._utils import PropertyInfo
9-
from .shared_params.tool_call import ToolCall
10-
from .shared_params.user_message import UserMessage
11-
from .shared_params.system_message import SystemMessage
9+
from .shared_params.message import Message
1210
from .shared_params.sampling_params import SamplingParams
13-
from .shared_params.interleaved_content import InterleavedContent
1411
from .shared_params.tool_param_definition import ToolParamDefinition
15-
from .shared_params.tool_response_message import ToolResponseMessage
1612

1713
__all__ = [
1814
"InferenceChatCompletionParamsBase",
19-
"Message",
20-
"MessageCompletionMessage",
2115
"Logprobs",
2216
"ResponseFormat",
2317
"ResponseFormatUnionMember0",
@@ -61,19 +55,6 @@ class InferenceChatCompletionParamsBase(TypedDict, total=False):
6155
x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
6256

6357

64-
class MessageCompletionMessage(TypedDict, total=False):
65-
content: Required[InterleavedContent]
66-
67-
role: Required[Literal["assistant"]]
68-
69-
stop_reason: Required[Literal["end_of_turn", "end_of_message", "out_of_tokens"]]
70-
71-
tool_calls: Required[Iterable[ToolCall]]
72-
73-
74-
Message: TypeAlias = Union[UserMessage, SystemMessage, ToolResponseMessage, MessageCompletionMessage]
75-
76-
7758
class Logprobs(TypedDict, total=False):
7859
top_k: int
7960

src/llama_stack_client/types/inference_chat_completion_response.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,31 +5,19 @@
55

66
from .._models import BaseModel
77
from .token_log_probs import TokenLogProbs
8-
from .shared.tool_call import ToolCall
98
from .shared.content_delta import ContentDelta
10-
from .shared.interleaved_content import InterleavedContent
9+
from .shared.completion_message import CompletionMessage
1110

1211
__all__ = [
1312
"InferenceChatCompletionResponse",
1413
"ChatCompletionResponse",
15-
"ChatCompletionResponseCompletionMessage",
1614
"ChatCompletionResponseStreamChunk",
1715
"ChatCompletionResponseStreamChunkEvent",
1816
]
1917

2018

21-
class ChatCompletionResponseCompletionMessage(BaseModel):
22-
content: InterleavedContent
23-
24-
role: Literal["assistant"]
25-
26-
stop_reason: Literal["end_of_turn", "end_of_message", "out_of_tokens"]
27-
28-
tool_calls: List[ToolCall]
29-
30-
3119
class ChatCompletionResponse(BaseModel):
32-
completion_message: ChatCompletionResponseCompletionMessage
20+
completion_message: CompletionMessage
3321

3422
logprobs: Optional[List[TokenLogProbs]] = None
3523

src/llama_stack_client/types/inference_step.py

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,19 @@
11
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
22

3-
from typing import List, Optional
3+
from typing import Optional
44
from datetime import datetime
55
from typing_extensions import Literal
66

77
from pydantic import Field as FieldInfo
88

99
from .._models import BaseModel
10-
from .shared.tool_call import ToolCall
11-
from .shared.interleaved_content import InterleavedContent
10+
from .shared.completion_message import CompletionMessage
1211

13-
__all__ = ["InferenceStep", "ModelResponse"]
14-
15-
16-
class ModelResponse(BaseModel):
17-
content: InterleavedContent
18-
19-
role: Literal["assistant"]
20-
21-
stop_reason: Literal["end_of_turn", "end_of_message", "out_of_tokens"]
22-
23-
tool_calls: List[ToolCall]
12+
__all__ = ["InferenceStep"]
2413

2514

2615
class InferenceStep(BaseModel):
27-
inference_model_response: ModelResponse = FieldInfo(alias="model_response")
16+
inference_model_response: CompletionMessage = FieldInfo(alias="model_response")
2817

2918
step_id: str
3019

0 commit comments

Comments
 (0)