Skip to content

Commit bb3fa17

Browse files
authored
v0.1.4 - Sync updates from stainless branch: yanxi0830/dev (#164)
# What does this PR do? - as title [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan ``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/client-sdk/agents/test_agents.py --inference-model meta-llama/Llama-3.1-8B-Instruc ``` [//]: # (## Documentation) [//]: # (- [ ] Added a Changelog entry if the change is significant)
1 parent 3cf8ac8 commit bb3fa17

File tree

11 files changed

+173
-32
lines changed

11 files changed

+173
-32
lines changed

src/llama_stack_client/_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def __init__(
135135
if base_url is None:
136136
base_url = os.environ.get("LLAMA_STACK_BASE_URL")
137137
if base_url is None:
138-
base_url = "http://any-hosted-llama-stack.com"
138+
base_url = f"http://any-hosted-llama-stack.com"
139139

140140
custom_headers = default_headers or {}
141141
custom_headers["X-LlamaStack-Client-Version"] = __version__
@@ -351,7 +351,7 @@ def __init__(
351351
if base_url is None:
352352
base_url = os.environ.get("LLAMA_STACK_BASE_URL")
353353
if base_url is None:
354-
base_url = "http://any-hosted-llama-stack.com"
354+
base_url = f"http://any-hosted-llama-stack.com"
355355

356356
custom_headers = default_headers or {}
357357
custom_headers["X-LlamaStack-Client-Version"] = __version__

src/llama_stack_client/_files.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def _transform_file(file: FileTypes) -> HttpxFileTypes:
7171
if is_tuple_t(file):
7272
return (file[0], _read_file_content(file[1]), *file[2:])
7373

74-
raise TypeError("Expected file types input to be a FileContent type or to be a tuple")
74+
raise TypeError(f"Expected file types input to be a FileContent type or to be a tuple")
7575

7676

7777
def _read_file_content(file: FileContent) -> HttpxFileContent:
@@ -113,7 +113,7 @@ async def _async_transform_file(file: FileTypes) -> HttpxFileTypes:
113113
if is_tuple_t(file):
114114
return (file[0], await _async_read_file_content(file[1]), *file[2:])
115115

116-
raise TypeError("Expected file types input to be a FileContent type or to be a tuple")
116+
raise TypeError(f"Expected file types input to be a FileContent type or to be a tuple")
117117

118118

119119
async def _async_read_file_content(file: FileContent) -> HttpxFileContent:

src/llama_stack_client/_response.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
229229
# the response class ourselves but that is something that should be supported directly in httpx
230230
# as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
231231
if cast_to != httpx.Response:
232-
raise ValueError("Subclasses of httpx.Response cannot be passed to `cast_to`")
232+
raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
233233
return cast(R, response)
234234

235235
if (
@@ -245,9 +245,9 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
245245

246246
if (
247247
cast_to is not object
248-
and origin is not list
249-
and origin is not dict
250-
and origin is not Union
248+
and not origin is list
249+
and not origin is dict
250+
and not origin is Union
251251
and not issubclass(origin, BaseModel)
252252
):
253253
raise RuntimeError(

src/llama_stack_client/resources/agents/turn.py

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,18 @@ def resume(
247247
extra_body: Body | None = None,
248248
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
249249
) -> Turn:
250-
"""
250+
"""Resume an agent turn with executed tool call responses.
251+
252+
When a Turn has the
253+
status `awaiting_input` due to pending input from client side tool calls, this
254+
endpoint can be used to submit the outputs from the tool calls once they are
255+
ready.
256+
251257
Args:
258+
tool_responses: The tool call responses to resume the turn with.
259+
260+
stream: Whether to stream the response.
261+
252262
extra_headers: Send extra headers
253263
254264
extra_query: Add additional query parameters to the request
@@ -275,8 +285,18 @@ def resume(
275285
extra_body: Body | None = None,
276286
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
277287
) -> Stream[AgentTurnResponseStreamChunk]:
278-
"""
288+
"""Resume an agent turn with executed tool call responses.
289+
290+
When a Turn has the
291+
status `awaiting_input` due to pending input from client side tool calls, this
292+
endpoint can be used to submit the outputs from the tool calls once they are
293+
ready.
294+
279295
Args:
296+
stream: Whether to stream the response.
297+
298+
tool_responses: The tool call responses to resume the turn with.
299+
280300
extra_headers: Send extra headers
281301
282302
extra_query: Add additional query parameters to the request
@@ -303,8 +323,18 @@ def resume(
303323
extra_body: Body | None = None,
304324
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
305325
) -> Turn | Stream[AgentTurnResponseStreamChunk]:
306-
"""
326+
"""Resume an agent turn with executed tool call responses.
327+
328+
When a Turn has the
329+
status `awaiting_input` due to pending input from client side tool calls, this
330+
endpoint can be used to submit the outputs from the tool calls once they are
331+
ready.
332+
307333
Args:
334+
stream: Whether to stream the response.
335+
336+
tool_responses: The tool call responses to resume the turn with.
337+
308338
extra_headers: Send extra headers
309339
310340
extra_query: Add additional query parameters to the request
@@ -571,8 +601,18 @@ async def resume(
571601
extra_body: Body | None = None,
572602
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
573603
) -> Turn:
574-
"""
604+
"""Resume an agent turn with executed tool call responses.
605+
606+
When a Turn has the
607+
status `awaiting_input` due to pending input from client side tool calls, this
608+
endpoint can be used to submit the outputs from the tool calls once they are
609+
ready.
610+
575611
Args:
612+
tool_responses: The tool call responses to resume the turn with.
613+
614+
stream: Whether to stream the response.
615+
576616
extra_headers: Send extra headers
577617
578618
extra_query: Add additional query parameters to the request
@@ -599,8 +639,18 @@ async def resume(
599639
extra_body: Body | None = None,
600640
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
601641
) -> AsyncStream[AgentTurnResponseStreamChunk]:
602-
"""
642+
"""Resume an agent turn with executed tool call responses.
643+
644+
When a Turn has the
645+
status `awaiting_input` due to pending input from client side tool calls, this
646+
endpoint can be used to submit the outputs from the tool calls once they are
647+
ready.
648+
603649
Args:
650+
stream: Whether to stream the response.
651+
652+
tool_responses: The tool call responses to resume the turn with.
653+
604654
extra_headers: Send extra headers
605655
606656
extra_query: Add additional query parameters to the request
@@ -627,8 +677,18 @@ async def resume(
627677
extra_body: Body | None = None,
628678
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
629679
) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
630-
"""
680+
"""Resume an agent turn with executed tool call responses.
681+
682+
When a Turn has the
683+
status `awaiting_input` due to pending input from client side tool calls, this
684+
endpoint can be used to submit the outputs from the tool calls once they are
685+
ready.
686+
631687
Args:
688+
stream: Whether to stream the response.
689+
690+
tool_responses: The tool call responses to resume the turn with.
691+
632692
extra_headers: Send extra headers
633693
634694
extra_query: Add additional query parameters to the request

src/llama_stack_client/resources/inference.py

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from __future__ import annotations
44

5-
from typing import List, Iterable
5+
from typing import List, Union, Iterable
66
from typing_extensions import Literal, overload
77

88
import httpx
@@ -36,6 +36,7 @@
3636
from ..types.shared.chat_completion_response import ChatCompletionResponse
3737
from ..types.shared_params.interleaved_content import InterleavedContent
3838
from ..types.chat_completion_response_stream_chunk import ChatCompletionResponseStreamChunk
39+
from ..types.shared_params.interleaved_content_item import InterleavedContentItem
3940

4041
__all__ = ["InferenceResource", "AsyncInferenceResource"]
4142

@@ -493,8 +494,11 @@ def completion(
493494
def embeddings(
494495
self,
495496
*,
496-
contents: List[InterleavedContent],
497+
contents: Union[List[str], Iterable[InterleavedContentItem]],
497498
model_id: str,
499+
output_dimension: int | NotGiven = NOT_GIVEN,
500+
task_type: Literal["query", "document"] | NotGiven = NOT_GIVEN,
501+
text_truncation: Literal["none", "start", "end"] | NotGiven = NOT_GIVEN,
498502
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
499503
# The extra values given here take precedence over values defined on the client or passed to this method.
500504
extra_headers: Headers | None = None,
@@ -506,13 +510,22 @@ def embeddings(
506510
Generate embeddings for content pieces using the specified model.
507511
508512
Args:
509-
contents: List of contents to generate embeddings for. Note that content can be
510-
multimodal. The behavior depends on the model and provider. Some models may only
511-
support text.
513+
contents: List of contents to generate embeddings for. Each content can be a string or an
514+
InterleavedContentItem (and hence can be multimodal). The behavior depends on
515+
the model and provider. Some models may only support text.
512516
513517
model_id: The identifier of the model to use. The model must be an embedding model
514518
registered with Llama Stack and available via the /models endpoint.
515519
520+
output_dimension: (Optional) Output dimensionality for the embeddings. Only supported by
521+
Matryoshka models.
522+
523+
task_type: (Optional) How is the embedding being used? This is only supported by asymmetric
524+
embedding models.
525+
526+
text_truncation: (Optional) Config for how to truncate text for embedding when text is longer
527+
than the model's max sequence length.
528+
516529
extra_headers: Send extra headers
517530
518531
extra_query: Add additional query parameters to the request
@@ -527,6 +540,9 @@ def embeddings(
527540
{
528541
"contents": contents,
529542
"model_id": model_id,
543+
"output_dimension": output_dimension,
544+
"task_type": task_type,
545+
"text_truncation": text_truncation,
530546
},
531547
inference_embeddings_params.InferenceEmbeddingsParams,
532548
),
@@ -990,8 +1006,11 @@ async def completion(
9901006
async def embeddings(
9911007
self,
9921008
*,
993-
contents: List[InterleavedContent],
1009+
contents: Union[List[str], Iterable[InterleavedContentItem]],
9941010
model_id: str,
1011+
output_dimension: int | NotGiven = NOT_GIVEN,
1012+
task_type: Literal["query", "document"] | NotGiven = NOT_GIVEN,
1013+
text_truncation: Literal["none", "start", "end"] | NotGiven = NOT_GIVEN,
9951014
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
9961015
# The extra values given here take precedence over values defined on the client or passed to this method.
9971016
extra_headers: Headers | None = None,
@@ -1003,13 +1022,22 @@ async def embeddings(
10031022
Generate embeddings for content pieces using the specified model.
10041023
10051024
Args:
1006-
contents: List of contents to generate embeddings for. Note that content can be
1007-
multimodal. The behavior depends on the model and provider. Some models may only
1008-
support text.
1025+
contents: List of contents to generate embeddings for. Each content can be a string or an
1026+
InterleavedContentItem (and hence can be multimodal). The behavior depends on
1027+
the model and provider. Some models may only support text.
10091028
10101029
model_id: The identifier of the model to use. The model must be an embedding model
10111030
registered with Llama Stack and available via the /models endpoint.
10121031
1032+
output_dimension: (Optional) Output dimensionality for the embeddings. Only supported by
1033+
Matryoshka models.
1034+
1035+
task_type: (Optional) How is the embedding being used? This is only supported by asymmetric
1036+
embedding models.
1037+
1038+
text_truncation: (Optional) Config for how to truncate text for embedding when text is longer
1039+
than the model's max sequence length.
1040+
10131041
extra_headers: Send extra headers
10141042
10151043
extra_query: Add additional query parameters to the request
@@ -1024,6 +1052,9 @@ async def embeddings(
10241052
{
10251053
"contents": contents,
10261054
"model_id": model_id,
1055+
"output_dimension": output_dimension,
1056+
"task_type": task_type,
1057+
"text_truncation": text_truncation,
10271058
},
10281059
inference_embeddings_params.InferenceEmbeddingsParams,
10291060
),

src/llama_stack_client/types/agents/turn_resume_params.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,17 @@ class TurnResumeParamsBase(TypedDict, total=False):
1616
session_id: Required[str]
1717

1818
tool_responses: Required[Iterable[ToolResponseMessage]]
19+
"""The tool call responses to resume the turn with."""
1920

2021

2122
class TurnResumeParamsNonStreaming(TurnResumeParamsBase, total=False):
2223
stream: Literal[False]
24+
"""Whether to stream the response."""
2325

2426

2527
class TurnResumeParamsStreaming(TurnResumeParamsBase):
2628
stream: Required[Literal[True]]
29+
"""Whether to stream the response."""
2730

2831

2932
TurnResumeParams = Union[TurnResumeParamsNonStreaming, TurnResumeParamsStreaming]

src/llama_stack_client/types/inference_embeddings_params.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,21 @@
22

33
from __future__ import annotations
44

5-
from typing import List
6-
from typing_extensions import Required, TypedDict
5+
from typing import List, Union, Iterable
6+
from typing_extensions import Literal, Required, TypedDict
77

8-
from .shared_params.interleaved_content import InterleavedContent
8+
from .shared_params.interleaved_content_item import InterleavedContentItem
99

1010
__all__ = ["InferenceEmbeddingsParams"]
1111

1212

1313
class InferenceEmbeddingsParams(TypedDict, total=False):
14-
contents: Required[List[InterleavedContent]]
14+
contents: Required[Union[List[str], Iterable[InterleavedContentItem]]]
1515
"""List of contents to generate embeddings for.
1616
17-
Note that content can be multimodal. The behavior depends on the model and
18-
provider. Some models may only support text.
17+
Each content can be a string or an InterleavedContentItem (and hence can be
18+
multimodal). The behavior depends on the model and provider. Some models may
19+
only support text.
1920
"""
2021

2122
model_id: Required[str]
@@ -24,3 +25,21 @@ class InferenceEmbeddingsParams(TypedDict, total=False):
2425
The model must be an embedding model registered with Llama Stack and available
2526
via the /models endpoint.
2627
"""
28+
29+
output_dimension: int
30+
"""(Optional) Output dimensionality for the embeddings.
31+
32+
Only supported by Matryoshka models.
33+
"""
34+
35+
task_type: Literal["query", "document"]
36+
"""
37+
(Optional) How is the embedding being used? This is only supported by asymmetric
38+
embedding models.
39+
"""
40+
41+
text_truncation: Literal["none", "start", "end"]
42+
"""
43+
(Optional) Config for how to truncate text for embedding when text is longer
44+
than the model's max sequence length.
45+
"""
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
22

3-
from typing import Optional
3+
from typing import Dict, List, Union, Optional
44

55
from ..._models import BaseModel
66
from .interleaved_content import InterleavedContent
@@ -9,5 +9,7 @@
99

1010

1111
class QueryResult(BaseModel):
12+
metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
13+
1214
content: Optional[InterleavedContent] = None
1315
"""A image content item"""

src/llama_stack_client/types/tool_invocation_result.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
22

3-
from typing import Optional
3+
from typing import Dict, List, Union, Optional
44

55
from .._models import BaseModel
66
from .shared.interleaved_content import InterleavedContent
@@ -15,3 +15,5 @@ class ToolInvocationResult(BaseModel):
1515
error_code: Optional[int] = None
1616

1717
error_message: Optional[str] = None
18+
19+
metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None

0 commit comments

Comments
 (0)