From 9c15910b9fdddf1549c4b886f535911657a165f6 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Fri, 23 May 2025 11:13:15 -0700 Subject: [PATCH] Sync updates from stainless branch: ashwinb/dev --- src/llama_stack_client/__init__.py | 5 + src/llama_stack_client/_utils/_proxy.py | 5 +- .../_utils/_resources_proxy.py | 24 + .../resources/agents/agents.py | 4 +- .../resources/agents/session.py | 4 +- .../resources/benchmarks.py | 34 + .../resources/chat/completions.py | 466 ++++++++---- .../resources/completions.py | 192 ++--- src/llama_stack_client/resources/datasets.py | 26 +- src/llama_stack_client/resources/inference.py | 136 ++-- src/llama_stack_client/resources/inspect.py | 4 + src/llama_stack_client/resources/models.py | 34 + .../resources/post_training/job.py | 26 + .../resources/post_training/post_training.py | 60 ++ src/llama_stack_client/resources/providers.py | 6 + src/llama_stack_client/resources/responses.py | 36 +- src/llama_stack_client/resources/routes.py | 2 + src/llama_stack_client/resources/safety.py | 16 + src/llama_stack_client/resources/scoring.py | 16 + .../resources/scoring_functions.py | 32 + src/llama_stack_client/resources/shields.py | 26 + src/llama_stack_client/resources/telemetry.py | 88 +++ .../resources/tool_runtime/rag_tool.py | 4 + .../resources/tool_runtime/tool_runtime.py | 24 +- .../resources/toolgroups.py | 32 +- src/llama_stack_client/resources/tools.py | 12 +- .../resources/vector_dbs.py | 34 + src/llama_stack_client/resources/vector_io.py | 32 +- src/llama_stack_client/types/benchmark.py | 6 +- .../types/benchmark_register_params.py | 6 + src/llama_stack_client/types/chat/__init__.py | 3 + .../types/chat/completion_create_params.py | 46 +- .../types/chat/completion_list_params.py | 21 + .../types/chat/completion_list_response.py | 667 ++++++++++++++++++ .../chat/completion_retrieve_response.py | 626 ++++++++++++++++ .../types/completion_create_params.py | 34 +- .../types/dataset_list_response.py | 6 +- .../types/dataset_register_params.py | 4 +- .../types/dataset_register_response.py | 6 +- .../types/dataset_retrieve_response.py | 6 +- .../inference_batch_chat_completion_params.py | 17 +- .../inference_batch_completion_params.py | 14 +- .../types/inference_chat_completion_params.py | 6 +- .../types/inference_completion_params.py | 6 +- .../types/inference_step.py | 1 + .../types/memory_retrieval_step.py | 1 + src/llama_stack_client/types/model.py | 6 +- .../types/model_register_params.py | 5 + .../post_training/job_artifacts_params.py | 1 + .../types/post_training/job_cancel_params.py | 1 + .../types/post_training/job_status_params.py | 1 + ...ost_training_preference_optimize_params.py | 6 + ...st_training_supervised_fine_tune_params.py | 7 + .../types/response_create_params.py | 174 ++++- .../types/response_object.py | 77 +- .../types/safety_run_shield_params.py | 3 + src/llama_stack_client/types/scoring_fn.py | 4 +- .../types/scoring_fn_params.py | 24 +- .../types/scoring_fn_params_param.py | 24 +- .../types/scoring_function_register_params.py | 8 + .../types/scoring_score_batch_params.py | 3 + .../types/shared/query_config.py | 16 + .../types/shared_params/query_config.py | 14 + src/llama_stack_client/types/shield.py | 4 +- .../types/shield_call_step.py | 1 + .../types/shield_register_params.py | 4 + .../types/telemetry_get_span_tree_params.py | 2 + .../types/telemetry_log_event_params.py | 2 + .../types/telemetry_query_spans_params.py | 3 + .../types/telemetry_query_traces_params.py | 4 + .../telemetry_save_spans_to_dataset_params.py | 4 + src/llama_stack_client/types/tool.py | 4 +- .../types/tool_execution_step.py | 1 + src/llama_stack_client/types/tool_group.py | 4 +- .../types/tool_list_params.py | 1 + .../tool_runtime/rag_tool_query_params.py | 1 + .../types/tool_runtime_invoke_tool_params.py | 2 + .../types/tool_runtime_list_tools_params.py | 2 + .../types/toolgroup_register_params.py | 4 + .../types/vector_db_list_response.py | 6 +- .../types/vector_db_register_params.py | 5 + .../types/vector_db_register_response.py | 5 +- .../types/vector_db_retrieve_response.py | 5 +- .../types/vector_io_insert_params.py | 3 + .../types/vector_io_query_params.py | 4 +- tests/api_resources/chat/test_completions.py | 152 +++- tests/api_resources/test_eval.py | 96 ++- tests/api_resources/test_responses.py | 24 +- tests/api_resources/test_scoring.py | 24 + tests/api_resources/test_scoring_functions.py | 8 +- .../tool_runtime/test_rag_tool.py | 4 + tests/test_utils/test_proxy.py | 11 + 92 files changed, 3152 insertions(+), 468 deletions(-) create mode 100644 src/llama_stack_client/_utils/_resources_proxy.py create mode 100644 src/llama_stack_client/types/chat/completion_list_params.py create mode 100644 src/llama_stack_client/types/chat/completion_list_response.py create mode 100644 src/llama_stack_client/types/chat/completion_retrieve_response.py diff --git a/src/llama_stack_client/__init__.py b/src/llama_stack_client/__init__.py index 70ef01a4..7dc65e78 100644 --- a/src/llama_stack_client/__init__.py +++ b/src/llama_stack_client/__init__.py @@ -1,5 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +import typing as _t + from . import types from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes from ._utils import file_from_path @@ -84,6 +86,9 @@ "DefaultAsyncHttpxClient", ] +if not _t.TYPE_CHECKING: + from ._utils._resources_proxy import resources as resources + _setup_logging() # Update the __module__ attribute for exported symbols so that diff --git a/src/llama_stack_client/_utils/_proxy.py b/src/llama_stack_client/_utils/_proxy.py index ffd883e9..0f239a33 100644 --- a/src/llama_stack_client/_utils/_proxy.py +++ b/src/llama_stack_client/_utils/_proxy.py @@ -46,7 +46,10 @@ def __dir__(self) -> Iterable[str]: @property # type: ignore @override def __class__(self) -> type: # pyright: ignore - proxied = self.__get_proxied__() + try: + proxied = self.__get_proxied__() + except Exception: + return type(self) if issubclass(type(proxied), LazyProxy): return type(proxied) return proxied.__class__ diff --git a/src/llama_stack_client/_utils/_resources_proxy.py b/src/llama_stack_client/_utils/_resources_proxy.py new file mode 100644 index 00000000..bf0a876a --- /dev/null +++ b/src/llama_stack_client/_utils/_resources_proxy.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from typing import Any +from typing_extensions import override + +from ._proxy import LazyProxy + + +class ResourcesProxy(LazyProxy[Any]): + """A proxy for the `llama_stack_client.resources` module. + + This is used so that we can lazily import `llama_stack_client.resources` only when + needed *and* so that users can just import `llama_stack_client` and reference `llama_stack_client.resources` + """ + + @override + def __load__(self) -> Any: + import importlib + + mod = importlib.import_module("llama_stack_client.resources") + return mod + + +resources = ResourcesProxy().__as_proxied__() diff --git a/src/llama_stack_client/resources/agents/agents.py b/src/llama_stack_client/resources/agents/agents.py index ed03dde5..9c06cbd1 100644 --- a/src/llama_stack_client/resources/agents/agents.py +++ b/src/llama_stack_client/resources/agents/agents.py @@ -124,7 +124,7 @@ def delete( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ - Delete an agent by its ID. + Delete an agent by its ID and its associated sessions and turns. Args: extra_headers: Send extra headers @@ -225,7 +225,7 @@ async def delete( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ - Delete an agent by its ID. + Delete an agent by its ID and its associated sessions and turns. Args: extra_headers: Send extra headers diff --git a/src/llama_stack_client/resources/agents/session.py b/src/llama_stack_client/resources/agents/session.py index 0aec7449..ebdde5c6 100644 --- a/src/llama_stack_client/resources/agents/session.py +++ b/src/llama_stack_client/resources/agents/session.py @@ -137,7 +137,7 @@ def delete( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ - Delete an agent session by its ID. + Delete an agent session by its ID and its associated turns. Args: extra_headers: Send extra headers @@ -277,7 +277,7 @@ async def delete( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ - Delete an agent session by its ID. + Delete an agent session by its ID and its associated turns. Args: extra_headers: Send extra headers diff --git a/src/llama_stack_client/resources/benchmarks.py b/src/llama_stack_client/resources/benchmarks.py index ff6af994..f22865cb 100644 --- a/src/llama_stack_client/resources/benchmarks.py +++ b/src/llama_stack_client/resources/benchmarks.py @@ -57,6 +57,8 @@ def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Benchmark: """ + Get a benchmark by its ID. + Args: extra_headers: Send extra headers @@ -86,6 +88,7 @@ def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> BenchmarkListResponse: + """List all benchmarks.""" return self._get( "/v1/eval/benchmarks", options=make_request_options( @@ -115,7 +118,21 @@ def register( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Register a benchmark. + Args: + benchmark_id: The ID of the benchmark to register. + + dataset_id: The ID of the dataset to use for the benchmark. + + scoring_functions: The scoring functions to use for the benchmark. + + metadata: The metadata to use for the benchmark. + + provider_benchmark_id: The ID of the provider benchmark to use for the benchmark. + + provider_id: The ID of the provider to use for the benchmark. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -177,6 +194,8 @@ async def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Benchmark: """ + Get a benchmark by its ID. + Args: extra_headers: Send extra headers @@ -206,6 +225,7 @@ async def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> BenchmarkListResponse: + """List all benchmarks.""" return await self._get( "/v1/eval/benchmarks", options=make_request_options( @@ -235,7 +255,21 @@ async def register( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Register a benchmark. + Args: + benchmark_id: The ID of the benchmark to register. + + dataset_id: The ID of the dataset to use for the benchmark. + + scoring_functions: The scoring functions to use for the benchmark. + + metadata: The metadata to use for the benchmark. + + provider_benchmark_id: The ID of the provider benchmark to use for the benchmark. + + provider_id: The ID of the provider to use for the benchmark. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/chat/completions.py b/src/llama_stack_client/resources/chat/completions.py index 7c449d41..1ab543c5 100644 --- a/src/llama_stack_client/resources/chat/completions.py +++ b/src/llama_stack_client/resources/chat/completions.py @@ -18,10 +18,12 @@ async_to_streamed_response_wrapper, ) from ..._streaming import Stream, AsyncStream -from ...types.chat import completion_create_params +from ...types.chat import completion_list_params, completion_create_params from ..._base_client import make_request_options from ...types.chat_completion_chunk import ChatCompletionChunk +from ...types.chat.completion_list_response import CompletionListResponse from ...types.chat.completion_create_response import CompletionCreateResponse +from ...types.chat.completion_retrieve_response import CompletionRetrieveResponse __all__ = ["CompletionsResource", "AsyncCompletionsResource"] @@ -87,52 +89,52 @@ def create( specified model. Args: - messages: List of messages in the conversation + messages: List of messages in the conversation. model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - frequency_penalty: (Optional) The penalty for repeated tokens + frequency_penalty: (Optional) The penalty for repeated tokens. - function_call: (Optional) The function call to use + function_call: (Optional) The function call to use. - functions: (Optional) List of functions to use + functions: (Optional) List of functions to use. - logit_bias: (Optional) The logit bias to use + logit_bias: (Optional) The logit bias to use. - logprobs: (Optional) The log probabilities to use + logprobs: (Optional) The log probabilities to use. - max_completion_tokens: (Optional) The maximum number of tokens to generate + max_completion_tokens: (Optional) The maximum number of tokens to generate. - max_tokens: (Optional) The maximum number of tokens to generate + max_tokens: (Optional) The maximum number of tokens to generate. - n: (Optional) The number of completions to generate + n: (Optional) The number of completions to generate. - parallel_tool_calls: (Optional) Whether to parallelize tool calls + parallel_tool_calls: (Optional) Whether to parallelize tool calls. - presence_penalty: (Optional) The penalty for repeated tokens + presence_penalty: (Optional) The penalty for repeated tokens. - response_format: (Optional) The response format to use + response_format: (Optional) The response format to use. - seed: (Optional) The seed to use + seed: (Optional) The seed to use. - stop: (Optional) The stop tokens to use + stop: (Optional) The stop tokens to use. - stream: (Optional) Whether to stream the response + stream: (Optional) Whether to stream the response. - stream_options: (Optional) The stream options to use + stream_options: (Optional) The stream options to use. - temperature: (Optional) The temperature to use + temperature: (Optional) The temperature to use. - tool_choice: (Optional) The tool choice to use + tool_choice: (Optional) The tool choice to use. - tools: (Optional) The tools to use + tools: (Optional) The tools to use. - top_logprobs: (Optional) The top log probabilities to use + top_logprobs: (Optional) The top log probabilities to use. - top_p: (Optional) The top p to use + top_p: (Optional) The top p to use. - user: (Optional) The user to use + user: (Optional) The user to use. extra_headers: Send extra headers @@ -185,52 +187,52 @@ def create( specified model. Args: - messages: List of messages in the conversation + messages: List of messages in the conversation. model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - stream: (Optional) Whether to stream the response + stream: (Optional) Whether to stream the response. - frequency_penalty: (Optional) The penalty for repeated tokens + frequency_penalty: (Optional) The penalty for repeated tokens. - function_call: (Optional) The function call to use + function_call: (Optional) The function call to use. - functions: (Optional) List of functions to use + functions: (Optional) List of functions to use. - logit_bias: (Optional) The logit bias to use + logit_bias: (Optional) The logit bias to use. - logprobs: (Optional) The log probabilities to use + logprobs: (Optional) The log probabilities to use. - max_completion_tokens: (Optional) The maximum number of tokens to generate + max_completion_tokens: (Optional) The maximum number of tokens to generate. - max_tokens: (Optional) The maximum number of tokens to generate + max_tokens: (Optional) The maximum number of tokens to generate. - n: (Optional) The number of completions to generate + n: (Optional) The number of completions to generate. - parallel_tool_calls: (Optional) Whether to parallelize tool calls + parallel_tool_calls: (Optional) Whether to parallelize tool calls. - presence_penalty: (Optional) The penalty for repeated tokens + presence_penalty: (Optional) The penalty for repeated tokens. - response_format: (Optional) The response format to use + response_format: (Optional) The response format to use. - seed: (Optional) The seed to use + seed: (Optional) The seed to use. - stop: (Optional) The stop tokens to use + stop: (Optional) The stop tokens to use. - stream_options: (Optional) The stream options to use + stream_options: (Optional) The stream options to use. - temperature: (Optional) The temperature to use + temperature: (Optional) The temperature to use. - tool_choice: (Optional) The tool choice to use + tool_choice: (Optional) The tool choice to use. - tools: (Optional) The tools to use + tools: (Optional) The tools to use. - top_logprobs: (Optional) The top log probabilities to use + top_logprobs: (Optional) The top log probabilities to use. - top_p: (Optional) The top p to use + top_p: (Optional) The top p to use. - user: (Optional) The user to use + user: (Optional) The user to use. extra_headers: Send extra headers @@ -283,52 +285,52 @@ def create( specified model. Args: - messages: List of messages in the conversation + messages: List of messages in the conversation. model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - stream: (Optional) Whether to stream the response + stream: (Optional) Whether to stream the response. - frequency_penalty: (Optional) The penalty for repeated tokens + frequency_penalty: (Optional) The penalty for repeated tokens. - function_call: (Optional) The function call to use + function_call: (Optional) The function call to use. - functions: (Optional) List of functions to use + functions: (Optional) List of functions to use. - logit_bias: (Optional) The logit bias to use + logit_bias: (Optional) The logit bias to use. - logprobs: (Optional) The log probabilities to use + logprobs: (Optional) The log probabilities to use. - max_completion_tokens: (Optional) The maximum number of tokens to generate + max_completion_tokens: (Optional) The maximum number of tokens to generate. - max_tokens: (Optional) The maximum number of tokens to generate + max_tokens: (Optional) The maximum number of tokens to generate. - n: (Optional) The number of completions to generate + n: (Optional) The number of completions to generate. - parallel_tool_calls: (Optional) Whether to parallelize tool calls + parallel_tool_calls: (Optional) Whether to parallelize tool calls. - presence_penalty: (Optional) The penalty for repeated tokens + presence_penalty: (Optional) The penalty for repeated tokens. - response_format: (Optional) The response format to use + response_format: (Optional) The response format to use. - seed: (Optional) The seed to use + seed: (Optional) The seed to use. - stop: (Optional) The stop tokens to use + stop: (Optional) The stop tokens to use. - stream_options: (Optional) The stream options to use + stream_options: (Optional) The stream options to use. - temperature: (Optional) The temperature to use + temperature: (Optional) The temperature to use. - tool_choice: (Optional) The tool choice to use + tool_choice: (Optional) The tool choice to use. - tools: (Optional) The tools to use + tools: (Optional) The tools to use. - top_logprobs: (Optional) The top log probabilities to use + top_logprobs: (Optional) The top log probabilities to use. - top_p: (Optional) The top p to use + top_p: (Optional) The top p to use. - user: (Optional) The user to use + user: (Optional) The user to use. extra_headers: Send extra headers @@ -418,6 +420,93 @@ def create( stream_cls=Stream[ChatCompletionChunk], ) + def retrieve( + self, + completion_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionRetrieveResponse: + """ + Describe a chat completion by its ID. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not completion_id: + raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}") + return self._get( + f"/v1/openai/v1/chat/completions/{completion_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=CompletionRetrieveResponse, + ) + + def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + model: str | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionListResponse: + """ + List all chat completions. + + Args: + after: The ID of the last chat completion to return. + + limit: The maximum number of chat completions to return. + + model: The model to filter by. + + order: The order to sort the chat completions by: "asc" or "desc". Defaults to "desc". + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._get( + "/v1/openai/v1/chat/completions", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "after": after, + "limit": limit, + "model": model, + "order": order, + }, + completion_list_params.CompletionListParams, + ), + ), + cast_to=CompletionListResponse, + ) + class AsyncCompletionsResource(AsyncAPIResource): @cached_property @@ -480,52 +569,52 @@ async def create( specified model. Args: - messages: List of messages in the conversation + messages: List of messages in the conversation. model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - frequency_penalty: (Optional) The penalty for repeated tokens + frequency_penalty: (Optional) The penalty for repeated tokens. - function_call: (Optional) The function call to use + function_call: (Optional) The function call to use. - functions: (Optional) List of functions to use + functions: (Optional) List of functions to use. - logit_bias: (Optional) The logit bias to use + logit_bias: (Optional) The logit bias to use. - logprobs: (Optional) The log probabilities to use + logprobs: (Optional) The log probabilities to use. - max_completion_tokens: (Optional) The maximum number of tokens to generate + max_completion_tokens: (Optional) The maximum number of tokens to generate. - max_tokens: (Optional) The maximum number of tokens to generate + max_tokens: (Optional) The maximum number of tokens to generate. - n: (Optional) The number of completions to generate + n: (Optional) The number of completions to generate. - parallel_tool_calls: (Optional) Whether to parallelize tool calls + parallel_tool_calls: (Optional) Whether to parallelize tool calls. - presence_penalty: (Optional) The penalty for repeated tokens + presence_penalty: (Optional) The penalty for repeated tokens. - response_format: (Optional) The response format to use + response_format: (Optional) The response format to use. - seed: (Optional) The seed to use + seed: (Optional) The seed to use. - stop: (Optional) The stop tokens to use + stop: (Optional) The stop tokens to use. - stream: (Optional) Whether to stream the response + stream: (Optional) Whether to stream the response. - stream_options: (Optional) The stream options to use + stream_options: (Optional) The stream options to use. - temperature: (Optional) The temperature to use + temperature: (Optional) The temperature to use. - tool_choice: (Optional) The tool choice to use + tool_choice: (Optional) The tool choice to use. - tools: (Optional) The tools to use + tools: (Optional) The tools to use. - top_logprobs: (Optional) The top log probabilities to use + top_logprobs: (Optional) The top log probabilities to use. - top_p: (Optional) The top p to use + top_p: (Optional) The top p to use. - user: (Optional) The user to use + user: (Optional) The user to use. extra_headers: Send extra headers @@ -578,52 +667,52 @@ async def create( specified model. Args: - messages: List of messages in the conversation + messages: List of messages in the conversation. model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - stream: (Optional) Whether to stream the response + stream: (Optional) Whether to stream the response. - frequency_penalty: (Optional) The penalty for repeated tokens + frequency_penalty: (Optional) The penalty for repeated tokens. - function_call: (Optional) The function call to use + function_call: (Optional) The function call to use. - functions: (Optional) List of functions to use + functions: (Optional) List of functions to use. - logit_bias: (Optional) The logit bias to use + logit_bias: (Optional) The logit bias to use. - logprobs: (Optional) The log probabilities to use + logprobs: (Optional) The log probabilities to use. - max_completion_tokens: (Optional) The maximum number of tokens to generate + max_completion_tokens: (Optional) The maximum number of tokens to generate. - max_tokens: (Optional) The maximum number of tokens to generate + max_tokens: (Optional) The maximum number of tokens to generate. - n: (Optional) The number of completions to generate + n: (Optional) The number of completions to generate. - parallel_tool_calls: (Optional) Whether to parallelize tool calls + parallel_tool_calls: (Optional) Whether to parallelize tool calls. - presence_penalty: (Optional) The penalty for repeated tokens + presence_penalty: (Optional) The penalty for repeated tokens. - response_format: (Optional) The response format to use + response_format: (Optional) The response format to use. - seed: (Optional) The seed to use + seed: (Optional) The seed to use. - stop: (Optional) The stop tokens to use + stop: (Optional) The stop tokens to use. - stream_options: (Optional) The stream options to use + stream_options: (Optional) The stream options to use. - temperature: (Optional) The temperature to use + temperature: (Optional) The temperature to use. - tool_choice: (Optional) The tool choice to use + tool_choice: (Optional) The tool choice to use. - tools: (Optional) The tools to use + tools: (Optional) The tools to use. - top_logprobs: (Optional) The top log probabilities to use + top_logprobs: (Optional) The top log probabilities to use. - top_p: (Optional) The top p to use + top_p: (Optional) The top p to use. - user: (Optional) The user to use + user: (Optional) The user to use. extra_headers: Send extra headers @@ -676,52 +765,52 @@ async def create( specified model. Args: - messages: List of messages in the conversation + messages: List of messages in the conversation. model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - stream: (Optional) Whether to stream the response + stream: (Optional) Whether to stream the response. - frequency_penalty: (Optional) The penalty for repeated tokens + frequency_penalty: (Optional) The penalty for repeated tokens. - function_call: (Optional) The function call to use + function_call: (Optional) The function call to use. - functions: (Optional) List of functions to use + functions: (Optional) List of functions to use. - logit_bias: (Optional) The logit bias to use + logit_bias: (Optional) The logit bias to use. - logprobs: (Optional) The log probabilities to use + logprobs: (Optional) The log probabilities to use. - max_completion_tokens: (Optional) The maximum number of tokens to generate + max_completion_tokens: (Optional) The maximum number of tokens to generate. - max_tokens: (Optional) The maximum number of tokens to generate + max_tokens: (Optional) The maximum number of tokens to generate. - n: (Optional) The number of completions to generate + n: (Optional) The number of completions to generate. - parallel_tool_calls: (Optional) Whether to parallelize tool calls + parallel_tool_calls: (Optional) Whether to parallelize tool calls. - presence_penalty: (Optional) The penalty for repeated tokens + presence_penalty: (Optional) The penalty for repeated tokens. - response_format: (Optional) The response format to use + response_format: (Optional) The response format to use. - seed: (Optional) The seed to use + seed: (Optional) The seed to use. - stop: (Optional) The stop tokens to use + stop: (Optional) The stop tokens to use. - stream_options: (Optional) The stream options to use + stream_options: (Optional) The stream options to use. - temperature: (Optional) The temperature to use + temperature: (Optional) The temperature to use. - tool_choice: (Optional) The tool choice to use + tool_choice: (Optional) The tool choice to use. - tools: (Optional) The tools to use + tools: (Optional) The tools to use. - top_logprobs: (Optional) The top log probabilities to use + top_logprobs: (Optional) The top log probabilities to use. - top_p: (Optional) The top p to use + top_p: (Optional) The top p to use. - user: (Optional) The user to use + user: (Optional) The user to use. extra_headers: Send extra headers @@ -811,6 +900,93 @@ async def create( stream_cls=AsyncStream[ChatCompletionChunk], ) + async def retrieve( + self, + completion_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionRetrieveResponse: + """ + Describe a chat completion by its ID. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not completion_id: + raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}") + return await self._get( + f"/v1/openai/v1/chat/completions/{completion_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=CompletionRetrieveResponse, + ) + + async def list( + self, + *, + after: str | NotGiven = NOT_GIVEN, + limit: int | NotGiven = NOT_GIVEN, + model: str | NotGiven = NOT_GIVEN, + order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> CompletionListResponse: + """ + List all chat completions. + + Args: + after: The ID of the last chat completion to return. + + limit: The maximum number of chat completions to return. + + model: The model to filter by. + + order: The order to sort the chat completions by: "asc" or "desc". Defaults to "desc". + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._get( + "/v1/openai/v1/chat/completions", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=await async_maybe_transform( + { + "after": after, + "limit": limit, + "model": model, + "order": order, + }, + completion_list_params.CompletionListParams, + ), + ), + cast_to=CompletionListResponse, + ) + class CompletionsResourceWithRawResponse: def __init__(self, completions: CompletionsResource) -> None: @@ -819,6 +995,12 @@ def __init__(self, completions: CompletionsResource) -> None: self.create = to_raw_response_wrapper( completions.create, ) + self.retrieve = to_raw_response_wrapper( + completions.retrieve, + ) + self.list = to_raw_response_wrapper( + completions.list, + ) class AsyncCompletionsResourceWithRawResponse: @@ -828,6 +1010,12 @@ def __init__(self, completions: AsyncCompletionsResource) -> None: self.create = async_to_raw_response_wrapper( completions.create, ) + self.retrieve = async_to_raw_response_wrapper( + completions.retrieve, + ) + self.list = async_to_raw_response_wrapper( + completions.list, + ) class CompletionsResourceWithStreamingResponse: @@ -837,6 +1025,12 @@ def __init__(self, completions: CompletionsResource) -> None: self.create = to_streamed_response_wrapper( completions.create, ) + self.retrieve = to_streamed_response_wrapper( + completions.retrieve, + ) + self.list = to_streamed_response_wrapper( + completions.list, + ) class AsyncCompletionsResourceWithStreamingResponse: @@ -846,3 +1040,9 @@ def __init__(self, completions: AsyncCompletionsResource) -> None: self.create = async_to_streamed_response_wrapper( completions.create, ) + self.retrieve = async_to_streamed_response_wrapper( + completions.retrieve, + ) + self.list = async_to_streamed_response_wrapper( + completions.list, + ) diff --git a/src/llama_stack_client/resources/completions.py b/src/llama_stack_client/resources/completions.py index 8f57aeb4..dfdd80f4 100644 --- a/src/llama_stack_client/resources/completions.py +++ b/src/llama_stack_client/resources/completions.py @@ -83,37 +83,37 @@ def create( model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - prompt: The prompt to generate a completion for + prompt: The prompt to generate a completion for. - best_of: (Optional) The number of completions to generate + best_of: (Optional) The number of completions to generate. - echo: (Optional) Whether to echo the prompt + echo: (Optional) Whether to echo the prompt. - frequency_penalty: (Optional) The penalty for repeated tokens + frequency_penalty: (Optional) The penalty for repeated tokens. - logit_bias: (Optional) The logit bias to use + logit_bias: (Optional) The logit bias to use. - logprobs: (Optional) The log probabilities to use + logprobs: (Optional) The log probabilities to use. - max_tokens: (Optional) The maximum number of tokens to generate + max_tokens: (Optional) The maximum number of tokens to generate. - n: (Optional) The number of completions to generate + n: (Optional) The number of completions to generate. - presence_penalty: (Optional) The penalty for repeated tokens + presence_penalty: (Optional) The penalty for repeated tokens. - seed: (Optional) The seed to use + seed: (Optional) The seed to use. - stop: (Optional) The stop tokens to use + stop: (Optional) The stop tokens to use. - stream: (Optional) Whether to stream the response + stream: (Optional) Whether to stream the response. - stream_options: (Optional) The stream options to use + stream_options: (Optional) The stream options to use. - temperature: (Optional) The temperature to use + temperature: (Optional) The temperature to use. - top_p: (Optional) The top p to use + top_p: (Optional) The top p to use. - user: (Optional) The user to use + user: (Optional) The user to use. extra_headers: Send extra headers @@ -163,37 +163,37 @@ def create( model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - prompt: The prompt to generate a completion for + prompt: The prompt to generate a completion for. - stream: (Optional) Whether to stream the response + stream: (Optional) Whether to stream the response. - best_of: (Optional) The number of completions to generate + best_of: (Optional) The number of completions to generate. - echo: (Optional) Whether to echo the prompt + echo: (Optional) Whether to echo the prompt. - frequency_penalty: (Optional) The penalty for repeated tokens + frequency_penalty: (Optional) The penalty for repeated tokens. - logit_bias: (Optional) The logit bias to use + logit_bias: (Optional) The logit bias to use. - logprobs: (Optional) The log probabilities to use + logprobs: (Optional) The log probabilities to use. - max_tokens: (Optional) The maximum number of tokens to generate + max_tokens: (Optional) The maximum number of tokens to generate. - n: (Optional) The number of completions to generate + n: (Optional) The number of completions to generate. - presence_penalty: (Optional) The penalty for repeated tokens + presence_penalty: (Optional) The penalty for repeated tokens. - seed: (Optional) The seed to use + seed: (Optional) The seed to use. - stop: (Optional) The stop tokens to use + stop: (Optional) The stop tokens to use. - stream_options: (Optional) The stream options to use + stream_options: (Optional) The stream options to use. - temperature: (Optional) The temperature to use + temperature: (Optional) The temperature to use. - top_p: (Optional) The top p to use + top_p: (Optional) The top p to use. - user: (Optional) The user to use + user: (Optional) The user to use. extra_headers: Send extra headers @@ -243,37 +243,37 @@ def create( model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - prompt: The prompt to generate a completion for + prompt: The prompt to generate a completion for. - stream: (Optional) Whether to stream the response + stream: (Optional) Whether to stream the response. - best_of: (Optional) The number of completions to generate + best_of: (Optional) The number of completions to generate. - echo: (Optional) Whether to echo the prompt + echo: (Optional) Whether to echo the prompt. - frequency_penalty: (Optional) The penalty for repeated tokens + frequency_penalty: (Optional) The penalty for repeated tokens. - logit_bias: (Optional) The logit bias to use + logit_bias: (Optional) The logit bias to use. - logprobs: (Optional) The log probabilities to use + logprobs: (Optional) The log probabilities to use. - max_tokens: (Optional) The maximum number of tokens to generate + max_tokens: (Optional) The maximum number of tokens to generate. - n: (Optional) The number of completions to generate + n: (Optional) The number of completions to generate. - presence_penalty: (Optional) The penalty for repeated tokens + presence_penalty: (Optional) The penalty for repeated tokens. - seed: (Optional) The seed to use + seed: (Optional) The seed to use. - stop: (Optional) The stop tokens to use + stop: (Optional) The stop tokens to use. - stream_options: (Optional) The stream options to use + stream_options: (Optional) The stream options to use. - temperature: (Optional) The temperature to use + temperature: (Optional) The temperature to use. - top_p: (Optional) The top p to use + top_p: (Optional) The top p to use. - user: (Optional) The user to use + user: (Optional) The user to use. extra_headers: Send extra headers @@ -410,37 +410,37 @@ async def create( model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - prompt: The prompt to generate a completion for + prompt: The prompt to generate a completion for. - best_of: (Optional) The number of completions to generate + best_of: (Optional) The number of completions to generate. - echo: (Optional) Whether to echo the prompt + echo: (Optional) Whether to echo the prompt. - frequency_penalty: (Optional) The penalty for repeated tokens + frequency_penalty: (Optional) The penalty for repeated tokens. - logit_bias: (Optional) The logit bias to use + logit_bias: (Optional) The logit bias to use. - logprobs: (Optional) The log probabilities to use + logprobs: (Optional) The log probabilities to use. - max_tokens: (Optional) The maximum number of tokens to generate + max_tokens: (Optional) The maximum number of tokens to generate. - n: (Optional) The number of completions to generate + n: (Optional) The number of completions to generate. - presence_penalty: (Optional) The penalty for repeated tokens + presence_penalty: (Optional) The penalty for repeated tokens. - seed: (Optional) The seed to use + seed: (Optional) The seed to use. - stop: (Optional) The stop tokens to use + stop: (Optional) The stop tokens to use. - stream: (Optional) Whether to stream the response + stream: (Optional) Whether to stream the response. - stream_options: (Optional) The stream options to use + stream_options: (Optional) The stream options to use. - temperature: (Optional) The temperature to use + temperature: (Optional) The temperature to use. - top_p: (Optional) The top p to use + top_p: (Optional) The top p to use. - user: (Optional) The user to use + user: (Optional) The user to use. extra_headers: Send extra headers @@ -490,37 +490,37 @@ async def create( model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - prompt: The prompt to generate a completion for + prompt: The prompt to generate a completion for. - stream: (Optional) Whether to stream the response + stream: (Optional) Whether to stream the response. - best_of: (Optional) The number of completions to generate + best_of: (Optional) The number of completions to generate. - echo: (Optional) Whether to echo the prompt + echo: (Optional) Whether to echo the prompt. - frequency_penalty: (Optional) The penalty for repeated tokens + frequency_penalty: (Optional) The penalty for repeated tokens. - logit_bias: (Optional) The logit bias to use + logit_bias: (Optional) The logit bias to use. - logprobs: (Optional) The log probabilities to use + logprobs: (Optional) The log probabilities to use. - max_tokens: (Optional) The maximum number of tokens to generate + max_tokens: (Optional) The maximum number of tokens to generate. - n: (Optional) The number of completions to generate + n: (Optional) The number of completions to generate. - presence_penalty: (Optional) The penalty for repeated tokens + presence_penalty: (Optional) The penalty for repeated tokens. - seed: (Optional) The seed to use + seed: (Optional) The seed to use. - stop: (Optional) The stop tokens to use + stop: (Optional) The stop tokens to use. - stream_options: (Optional) The stream options to use + stream_options: (Optional) The stream options to use. - temperature: (Optional) The temperature to use + temperature: (Optional) The temperature to use. - top_p: (Optional) The top p to use + top_p: (Optional) The top p to use. - user: (Optional) The user to use + user: (Optional) The user to use. extra_headers: Send extra headers @@ -570,37 +570,37 @@ async def create( model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. - prompt: The prompt to generate a completion for + prompt: The prompt to generate a completion for. - stream: (Optional) Whether to stream the response + stream: (Optional) Whether to stream the response. - best_of: (Optional) The number of completions to generate + best_of: (Optional) The number of completions to generate. - echo: (Optional) Whether to echo the prompt + echo: (Optional) Whether to echo the prompt. - frequency_penalty: (Optional) The penalty for repeated tokens + frequency_penalty: (Optional) The penalty for repeated tokens. - logit_bias: (Optional) The logit bias to use + logit_bias: (Optional) The logit bias to use. - logprobs: (Optional) The log probabilities to use + logprobs: (Optional) The log probabilities to use. - max_tokens: (Optional) The maximum number of tokens to generate + max_tokens: (Optional) The maximum number of tokens to generate. - n: (Optional) The number of completions to generate + n: (Optional) The number of completions to generate. - presence_penalty: (Optional) The penalty for repeated tokens + presence_penalty: (Optional) The penalty for repeated tokens. - seed: (Optional) The seed to use + seed: (Optional) The seed to use. - stop: (Optional) The stop tokens to use + stop: (Optional) The stop tokens to use. - stream_options: (Optional) The stream options to use + stream_options: (Optional) The stream options to use. - temperature: (Optional) The temperature to use + temperature: (Optional) The temperature to use. - top_p: (Optional) The top p to use + top_p: (Optional) The top p to use. - user: (Optional) The user to use + user: (Optional) The user to use. extra_headers: Send extra headers diff --git a/src/llama_stack_client/resources/datasets.py b/src/llama_stack_client/resources/datasets.py index 845f182b..447e2305 100644 --- a/src/llama_stack_client/resources/datasets.py +++ b/src/llama_stack_client/resources/datasets.py @@ -60,6 +60,8 @@ def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> DatasetRetrieveResponse: """ + Get a dataset by its ID. + Args: extra_headers: Send extra headers @@ -89,6 +91,7 @@ def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> DatasetListResponse: + """List all datasets.""" return self._get( "/v1/datasets", options=make_request_options( @@ -123,8 +126,8 @@ def iterrows( The response includes: - - data: List of items for the current page - - has_more: Whether there are more items available after this set + - data: List of items for the current page. + - has_more: Whether there are more items available after this set. Args: limit: The number of rows to get. @@ -178,7 +181,7 @@ def register( Args: purpose: The purpose of the dataset. - One of - "post-training/messages": The dataset + One of: - "post-training/messages": The dataset contains a messages column with list of messages for post-training. { "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] } - "eval/question-answer": The @@ -201,7 +204,7 @@ def register( dataset_id: The ID of the dataset. If not provided, an ID will be generated. - metadata: The metadata for the dataset. - E.g. {"description": "My dataset"} + metadata: The metadata for the dataset. - E.g. {"description": "My dataset"}. extra_headers: Send extra headers @@ -240,6 +243,8 @@ def unregister( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Unregister a dataset by its ID. + Args: extra_headers: Send extra headers @@ -293,6 +298,8 @@ async def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> DatasetRetrieveResponse: """ + Get a dataset by its ID. + Args: extra_headers: Send extra headers @@ -322,6 +329,7 @@ async def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> DatasetListResponse: + """List all datasets.""" return await self._get( "/v1/datasets", options=make_request_options( @@ -356,8 +364,8 @@ async def iterrows( The response includes: - - data: List of items for the current page - - has_more: Whether there are more items available after this set + - data: List of items for the current page. + - has_more: Whether there are more items available after this set. Args: limit: The number of rows to get. @@ -411,7 +419,7 @@ async def register( Args: purpose: The purpose of the dataset. - One of - "post-training/messages": The dataset + One of: - "post-training/messages": The dataset contains a messages column with list of messages for post-training. { "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] } - "eval/question-answer": The @@ -434,7 +442,7 @@ async def register( dataset_id: The ID of the dataset. If not provided, an ID will be generated. - metadata: The metadata for the dataset. - E.g. {"description": "My dataset"} + metadata: The metadata for the dataset. - E.g. {"description": "My dataset"}. extra_headers: Send extra headers @@ -473,6 +481,8 @@ async def unregister( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Unregister a dataset by its ID. + Args: extra_headers: Send extra headers diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py index be87eda4..a8bebdad 100644 --- a/src/llama_stack_client/resources/inference.py +++ b/src/llama_stack_client/resources/inference.py @@ -79,12 +79,24 @@ def batch_chat_completion( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> InferenceBatchChatCompletionResponse: """ + Generate chat completions for a batch of messages using the specified model. + Args: - response_format: Configuration for JSON schema-guided response generation. + messages_batch: The messages to generate completions for. + + model_id: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + logprobs: (Optional) If specified, log probabilities for each token position will be + returned. + + response_format: (Optional) Grammar specification for guided (structured) decoding. + + sampling_params: (Optional) Parameters to control the sampling strategy. - sampling_params: Sampling parameters. + tool_config: (Optional) Configuration for tool use. - tool_config: Configuration for tool use. + tools: (Optional) List of tool definitions available to the model. extra_headers: Send extra headers @@ -130,10 +142,20 @@ def batch_completion( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> BatchCompletion: """ + Generate completions for a batch of content using the specified model. + Args: - response_format: Configuration for JSON schema-guided response generation. + content_batch: The content to generate completions for. + + model_id: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + logprobs: (Optional) If specified, log probabilities for each token position will be + returned. + + response_format: (Optional) Grammar specification for guided (structured) decoding. - sampling_params: Sampling parameters. + sampling_params: (Optional) Parameters to control the sampling strategy. extra_headers: Send extra headers @@ -186,7 +208,7 @@ def chat_completion( Generate a chat completion for the given messages using the specified model. Args: - messages: List of messages in the conversation + messages: List of messages in the conversation. model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. @@ -199,7 +221,7 @@ def chat_completion( providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it. - sampling_params: Parameters to control the sampling strategy + sampling_params: Parameters to control the sampling strategy. stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False. @@ -217,7 +239,7 @@ def chat_completion( are output as Python syntax -- a list of function calls. .. deprecated:: Use tool_config instead. - tools: (Optional) List of tool definitions available to the model + tools: (Optional) List of tool definitions available to the model. extra_headers: Send extra headers @@ -254,7 +276,7 @@ def chat_completion( Generate a chat completion for the given messages using the specified model. Args: - messages: List of messages in the conversation + messages: List of messages in the conversation. model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. @@ -270,7 +292,7 @@ def chat_completion( providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it. - sampling_params: Parameters to control the sampling strategy + sampling_params: Parameters to control the sampling strategy. tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead. @@ -285,7 +307,7 @@ def chat_completion( are output as Python syntax -- a list of function calls. .. deprecated:: Use tool_config instead. - tools: (Optional) List of tool definitions available to the model + tools: (Optional) List of tool definitions available to the model. extra_headers: Send extra headers @@ -322,7 +344,7 @@ def chat_completion( Generate a chat completion for the given messages using the specified model. Args: - messages: List of messages in the conversation + messages: List of messages in the conversation. model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. @@ -338,7 +360,7 @@ def chat_completion( providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it. - sampling_params: Parameters to control the sampling strategy + sampling_params: Parameters to control the sampling strategy. tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead. @@ -353,7 +375,7 @@ def chat_completion( are output as Python syntax -- a list of function calls. .. deprecated:: Use tool_config instead. - tools: (Optional) List of tool definitions available to the model + tools: (Optional) List of tool definitions available to the model. extra_headers: Send extra headers @@ -436,7 +458,7 @@ def completion( Generate a completion for the given content using the specified model. Args: - content: The content to generate a completion for + content: The content to generate a completion for. model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. @@ -444,9 +466,9 @@ def completion( logprobs: (Optional) If specified, log probabilities for each token position will be returned. - response_format: (Optional) Grammar specification for guided (structured) decoding + response_format: (Optional) Grammar specification for guided (structured) decoding. - sampling_params: (Optional) Parameters to control the sampling strategy + sampling_params: (Optional) Parameters to control the sampling strategy. stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False. @@ -482,7 +504,7 @@ def completion( Generate a completion for the given content using the specified model. Args: - content: The content to generate a completion for + content: The content to generate a completion for. model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. @@ -493,9 +515,9 @@ def completion( logprobs: (Optional) If specified, log probabilities for each token position will be returned. - response_format: (Optional) Grammar specification for guided (structured) decoding + response_format: (Optional) Grammar specification for guided (structured) decoding. - sampling_params: (Optional) Parameters to control the sampling strategy + sampling_params: (Optional) Parameters to control the sampling strategy. extra_headers: Send extra headers @@ -528,7 +550,7 @@ def completion( Generate a completion for the given content using the specified model. Args: - content: The content to generate a completion for + content: The content to generate a completion for. model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. @@ -539,9 +561,9 @@ def completion( logprobs: (Optional) If specified, log probabilities for each token position will be returned. - response_format: (Optional) Grammar specification for guided (structured) decoding + response_format: (Optional) Grammar specification for guided (structured) decoding. - sampling_params: (Optional) Parameters to control the sampling strategy + sampling_params: (Optional) Parameters to control the sampling strategy. extra_headers: Send extra headers @@ -695,12 +717,24 @@ async def batch_chat_completion( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> InferenceBatchChatCompletionResponse: """ + Generate chat completions for a batch of messages using the specified model. + Args: - response_format: Configuration for JSON schema-guided response generation. + messages_batch: The messages to generate completions for. + + model_id: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + logprobs: (Optional) If specified, log probabilities for each token position will be + returned. + + response_format: (Optional) Grammar specification for guided (structured) decoding. + + sampling_params: (Optional) Parameters to control the sampling strategy. - sampling_params: Sampling parameters. + tool_config: (Optional) Configuration for tool use. - tool_config: Configuration for tool use. + tools: (Optional) List of tool definitions available to the model. extra_headers: Send extra headers @@ -746,10 +780,20 @@ async def batch_completion( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> BatchCompletion: """ + Generate completions for a batch of content using the specified model. + Args: - response_format: Configuration for JSON schema-guided response generation. + content_batch: The content to generate completions for. + + model_id: The identifier of the model to use. The model must be registered with Llama + Stack and available via the /models endpoint. + + logprobs: (Optional) If specified, log probabilities for each token position will be + returned. + + response_format: (Optional) Grammar specification for guided (structured) decoding. - sampling_params: Sampling parameters. + sampling_params: (Optional) Parameters to control the sampling strategy. extra_headers: Send extra headers @@ -802,7 +846,7 @@ async def chat_completion( Generate a chat completion for the given messages using the specified model. Args: - messages: List of messages in the conversation + messages: List of messages in the conversation. model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. @@ -815,7 +859,7 @@ async def chat_completion( providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it. - sampling_params: Parameters to control the sampling strategy + sampling_params: Parameters to control the sampling strategy. stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False. @@ -833,7 +877,7 @@ async def chat_completion( are output as Python syntax -- a list of function calls. .. deprecated:: Use tool_config instead. - tools: (Optional) List of tool definitions available to the model + tools: (Optional) List of tool definitions available to the model. extra_headers: Send extra headers @@ -870,7 +914,7 @@ async def chat_completion( Generate a chat completion for the given messages using the specified model. Args: - messages: List of messages in the conversation + messages: List of messages in the conversation. model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. @@ -886,7 +930,7 @@ async def chat_completion( providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it. - sampling_params: Parameters to control the sampling strategy + sampling_params: Parameters to control the sampling strategy. tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead. @@ -901,7 +945,7 @@ async def chat_completion( are output as Python syntax -- a list of function calls. .. deprecated:: Use tool_config instead. - tools: (Optional) List of tool definitions available to the model + tools: (Optional) List of tool definitions available to the model. extra_headers: Send extra headers @@ -938,7 +982,7 @@ async def chat_completion( Generate a chat completion for the given messages using the specified model. Args: - messages: List of messages in the conversation + messages: List of messages in the conversation. model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. @@ -954,7 +998,7 @@ async def chat_completion( providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF grammar. This format is more flexible, but not all providers support it. - sampling_params: Parameters to control the sampling strategy + sampling_params: Parameters to control the sampling strategy. tool_choice: (Optional) Whether tool use is required or automatic. Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead. @@ -969,7 +1013,7 @@ async def chat_completion( are output as Python syntax -- a list of function calls. .. deprecated:: Use tool_config instead. - tools: (Optional) List of tool definitions available to the model + tools: (Optional) List of tool definitions available to the model. extra_headers: Send extra headers @@ -1052,7 +1096,7 @@ async def completion( Generate a completion for the given content using the specified model. Args: - content: The content to generate a completion for + content: The content to generate a completion for. model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. @@ -1060,9 +1104,9 @@ async def completion( logprobs: (Optional) If specified, log probabilities for each token position will be returned. - response_format: (Optional) Grammar specification for guided (structured) decoding + response_format: (Optional) Grammar specification for guided (structured) decoding. - sampling_params: (Optional) Parameters to control the sampling strategy + sampling_params: (Optional) Parameters to control the sampling strategy. stream: (Optional) If True, generate an SSE event stream of the response. Defaults to False. @@ -1098,7 +1142,7 @@ async def completion( Generate a completion for the given content using the specified model. Args: - content: The content to generate a completion for + content: The content to generate a completion for. model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. @@ -1109,9 +1153,9 @@ async def completion( logprobs: (Optional) If specified, log probabilities for each token position will be returned. - response_format: (Optional) Grammar specification for guided (structured) decoding + response_format: (Optional) Grammar specification for guided (structured) decoding. - sampling_params: (Optional) Parameters to control the sampling strategy + sampling_params: (Optional) Parameters to control the sampling strategy. extra_headers: Send extra headers @@ -1144,7 +1188,7 @@ async def completion( Generate a completion for the given content using the specified model. Args: - content: The content to generate a completion for + content: The content to generate a completion for. model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint. @@ -1155,9 +1199,9 @@ async def completion( logprobs: (Optional) If specified, log probabilities for each token position will be returned. - response_format: (Optional) Grammar specification for guided (structured) decoding + response_format: (Optional) Grammar specification for guided (structured) decoding. - sampling_params: (Optional) Parameters to control the sampling strategy + sampling_params: (Optional) Parameters to control the sampling strategy. extra_headers: Send extra headers diff --git a/src/llama_stack_client/resources/inspect.py b/src/llama_stack_client/resources/inspect.py index 86fe1e43..dd9044e3 100644 --- a/src/llama_stack_client/resources/inspect.py +++ b/src/llama_stack_client/resources/inspect.py @@ -50,6 +50,7 @@ def health( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> HealthInfo: + """Get the health of the service.""" return self._get( "/v1/health", options=make_request_options( @@ -68,6 +69,7 @@ def version( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> VersionInfo: + """Get the version of the service.""" return self._get( "/v1/version", options=make_request_options( @@ -107,6 +109,7 @@ async def health( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> HealthInfo: + """Get the health of the service.""" return await self._get( "/v1/health", options=make_request_options( @@ -125,6 +128,7 @@ async def version( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> VersionInfo: + """Get the version of the service.""" return await self._get( "/v1/version", options=make_request_options( diff --git a/src/llama_stack_client/resources/models.py b/src/llama_stack_client/resources/models.py index 02458d3b..b3fcaa69 100644 --- a/src/llama_stack_client/resources/models.py +++ b/src/llama_stack_client/resources/models.py @@ -58,6 +58,8 @@ def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Model: """ + Get a model by its identifier. + Args: extra_headers: Send extra headers @@ -87,6 +89,7 @@ def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ModelListResponse: + """List all models.""" return self._get( "/v1/models", options=make_request_options( @@ -115,7 +118,19 @@ def register( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Model: """ + Register a model. + Args: + model_id: The identifier of the model to register. + + metadata: Any additional metadata for this model. + + model_type: The type of model to register. + + provider_id: The identifier of the provider. + + provider_model_id: The identifier of the model in the provider. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -154,6 +169,8 @@ def unregister( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Unregister a model. + Args: extra_headers: Send extra headers @@ -207,6 +224,8 @@ async def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Model: """ + Get a model by its identifier. + Args: extra_headers: Send extra headers @@ -236,6 +255,7 @@ async def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ModelListResponse: + """List all models.""" return await self._get( "/v1/models", options=make_request_options( @@ -264,7 +284,19 @@ async def register( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Model: """ + Register a model. + Args: + model_id: The identifier of the model to register. + + metadata: Any additional metadata for this model. + + model_type: The type of model to register. + + provider_id: The identifier of the provider. + + provider_model_id: The identifier of the model in the provider. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -303,6 +335,8 @@ async def unregister( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Unregister a model. + Args: extra_headers: Send extra headers diff --git a/src/llama_stack_client/resources/post_training/job.py b/src/llama_stack_client/resources/post_training/job.py index a55ba7fa..51a6ccda 100644 --- a/src/llama_stack_client/resources/post_training/job.py +++ b/src/llama_stack_client/resources/post_training/job.py @@ -56,6 +56,7 @@ def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> List[Data]: + """Get all training jobs.""" return self._get( "/v1/post-training/jobs", options=make_request_options( @@ -80,7 +81,11 @@ def artifacts( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> JobArtifactsResponse: """ + Get the artifacts of a training job. + Args: + job_uuid: The UUID of the job to get the artifacts of. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -113,7 +118,11 @@ def cancel( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Cancel a training job. + Args: + job_uuid: The UUID of the job to cancel. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -144,7 +153,11 @@ def status( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> JobStatusResponse: """ + Get the status of a training job. + Args: + job_uuid: The UUID of the job to get the status of. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -196,6 +209,7 @@ async def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> List[Data]: + """Get all training jobs.""" return await self._get( "/v1/post-training/jobs", options=make_request_options( @@ -220,7 +234,11 @@ async def artifacts( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> JobArtifactsResponse: """ + Get the artifacts of a training job. + Args: + job_uuid: The UUID of the job to get the artifacts of. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -253,7 +271,11 @@ async def cancel( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Cancel a training job. + Args: + job_uuid: The UUID of the job to cancel. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -284,7 +306,11 @@ async def status( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> JobStatusResponse: """ + Get the status of a training job. + Args: + job_uuid: The UUID of the job to get the status of. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/post_training/post_training.py b/src/llama_stack_client/resources/post_training/post_training.py index fe0d2b7b..6149edc3 100644 --- a/src/llama_stack_client/resources/post_training/post_training.py +++ b/src/llama_stack_client/resources/post_training/post_training.py @@ -76,7 +76,21 @@ def preference_optimize( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> PostTrainingJob: """ + Run preference optimization of a model. + Args: + algorithm_config: The algorithm configuration. + + finetuned_model: The model to fine-tune. + + hyperparam_search_config: The hyperparam search configuration. + + job_uuid: The UUID of the job to create. + + logger_config: The logger configuration. + + training_config: The training configuration. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -122,7 +136,23 @@ def supervised_fine_tune( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> PostTrainingJob: """ + Run supervised fine-tuning of a model. + Args: + hyperparam_search_config: The hyperparam search configuration. + + job_uuid: The UUID of the job to create. + + logger_config: The logger configuration. + + training_config: The training configuration. + + algorithm_config: The algorithm configuration. + + checkpoint_dir: The directory to save checkpoint(s) to. + + model: The model to fine-tune. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -193,7 +223,21 @@ async def preference_optimize( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> PostTrainingJob: """ + Run preference optimization of a model. + Args: + algorithm_config: The algorithm configuration. + + finetuned_model: The model to fine-tune. + + hyperparam_search_config: The hyperparam search configuration. + + job_uuid: The UUID of the job to create. + + logger_config: The logger configuration. + + training_config: The training configuration. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -239,7 +283,23 @@ async def supervised_fine_tune( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> PostTrainingJob: """ + Run supervised fine-tuning of a model. + Args: + hyperparam_search_config: The hyperparam search configuration. + + job_uuid: The UUID of the job to create. + + logger_config: The logger configuration. + + training_config: The training configuration. + + algorithm_config: The algorithm configuration. + + checkpoint_dir: The directory to save checkpoint(s) to. + + model: The model to fine-tune. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/providers.py b/src/llama_stack_client/resources/providers.py index d87960ac..a1c092eb 100644 --- a/src/llama_stack_client/resources/providers.py +++ b/src/llama_stack_client/resources/providers.py @@ -55,6 +55,8 @@ def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ProviderInfo: """ + Get detailed information about a specific provider. + Args: extra_headers: Send extra headers @@ -84,6 +86,7 @@ def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ProviderListResponse: + """List all available providers.""" return self._get( "/v1/providers", options=make_request_options( @@ -129,6 +132,8 @@ async def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ProviderInfo: """ + Get detailed information about a specific provider. + Args: extra_headers: Send extra headers @@ -158,6 +163,7 @@ async def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ProviderListResponse: + """List all available providers.""" return await self._get( "/v1/providers", options=make_request_options( diff --git a/src/llama_stack_client/resources/responses.py b/src/llama_stack_client/resources/responses.py index 5e7bd4c3..bb7fcdc6 100644 --- a/src/llama_stack_client/resources/responses.py +++ b/src/llama_stack_client/resources/responses.py @@ -52,9 +52,11 @@ def create( *, input: Union[str, Iterable[response_create_params.InputUnionMember1]], model: str, + instructions: str | NotGiven = NOT_GIVEN, previous_response_id: str | NotGiven = NOT_GIVEN, store: bool | NotGiven = NOT_GIVEN, stream: Literal[False] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -92,8 +94,10 @@ def create( input: Union[str, Iterable[response_create_params.InputUnionMember1]], model: str, stream: Literal[True], + instructions: str | NotGiven = NOT_GIVEN, previous_response_id: str | NotGiven = NOT_GIVEN, store: bool | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -131,8 +135,10 @@ def create( input: Union[str, Iterable[response_create_params.InputUnionMember1]], model: str, stream: bool, + instructions: str | NotGiven = NOT_GIVEN, previous_response_id: str | NotGiven = NOT_GIVEN, store: bool | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -169,9 +175,11 @@ def create( *, input: Union[str, Iterable[response_create_params.InputUnionMember1]], model: str, + instructions: str | NotGiven = NOT_GIVEN, previous_response_id: str | NotGiven = NOT_GIVEN, store: bool | NotGiven = NOT_GIVEN, stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -186,9 +194,11 @@ def create( { "input": input, "model": model, + "instructions": instructions, "previous_response_id": previous_response_id, "store": store, "stream": stream, + "temperature": temperature, "tools": tools, }, response_create_params.ResponseCreateParamsStreaming @@ -205,7 +215,7 @@ def create( def retrieve( self, - id: str, + response_id: str, *, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -226,10 +236,10 @@ def retrieve( timeout: Override the client-level default timeout for this request, in seconds """ - if not id: - raise ValueError(f"Expected a non-empty value for `id` but received {id!r}") + if not response_id: + raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}") return self._get( - f"/v1/openai/v1/responses/{id}", + f"/v1/openai/v1/responses/{response_id}", options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), @@ -263,9 +273,11 @@ async def create( *, input: Union[str, Iterable[response_create_params.InputUnionMember1]], model: str, + instructions: str | NotGiven = NOT_GIVEN, previous_response_id: str | NotGiven = NOT_GIVEN, store: bool | NotGiven = NOT_GIVEN, stream: Literal[False] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -303,8 +315,10 @@ async def create( input: Union[str, Iterable[response_create_params.InputUnionMember1]], model: str, stream: Literal[True], + instructions: str | NotGiven = NOT_GIVEN, previous_response_id: str | NotGiven = NOT_GIVEN, store: bool | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -342,8 +356,10 @@ async def create( input: Union[str, Iterable[response_create_params.InputUnionMember1]], model: str, stream: bool, + instructions: str | NotGiven = NOT_GIVEN, previous_response_id: str | NotGiven = NOT_GIVEN, store: bool | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -380,9 +396,11 @@ async def create( *, input: Union[str, Iterable[response_create_params.InputUnionMember1]], model: str, + instructions: str | NotGiven = NOT_GIVEN, previous_response_id: str | NotGiven = NOT_GIVEN, store: bool | NotGiven = NOT_GIVEN, stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -397,9 +415,11 @@ async def create( { "input": input, "model": model, + "instructions": instructions, "previous_response_id": previous_response_id, "store": store, "stream": stream, + "temperature": temperature, "tools": tools, }, response_create_params.ResponseCreateParamsStreaming @@ -416,7 +436,7 @@ async def create( async def retrieve( self, - id: str, + response_id: str, *, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. @@ -437,10 +457,10 @@ async def retrieve( timeout: Override the client-level default timeout for this request, in seconds """ - if not id: - raise ValueError(f"Expected a non-empty value for `id` but received {id!r}") + if not response_id: + raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}") return await self._get( - f"/v1/openai/v1/responses/{id}", + f"/v1/openai/v1/responses/{response_id}", options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), diff --git a/src/llama_stack_client/resources/routes.py b/src/llama_stack_client/resources/routes.py index a0aefdfa..d7c7cfff 100644 --- a/src/llama_stack_client/resources/routes.py +++ b/src/llama_stack_client/resources/routes.py @@ -52,6 +52,7 @@ def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> RouteListResponse: + """List all routes.""" return self._get( "/v1/inspect/routes", options=make_request_options( @@ -95,6 +96,7 @@ async def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> RouteListResponse: + """List all routes.""" return await self._get( "/v1/inspect/routes", options=make_request_options( diff --git a/src/llama_stack_client/resources/safety.py b/src/llama_stack_client/resources/safety.py index 66646102..abd2e775 100644 --- a/src/llama_stack_client/resources/safety.py +++ b/src/llama_stack_client/resources/safety.py @@ -58,7 +58,15 @@ def run_shield( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> RunShieldResponse: """ + Run a shield. + Args: + messages: The messages to run the shield on. + + params: The parameters of the shield. + + shield_id: The identifier of the shield to run. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -118,7 +126,15 @@ async def run_shield( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> RunShieldResponse: """ + Run a shield. + Args: + messages: The messages to run the shield on. + + params: The parameters of the shield. + + shield_id: The identifier of the shield to run. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/scoring.py b/src/llama_stack_client/resources/scoring.py index 33ee8969..dd650a82 100644 --- a/src/llama_stack_client/resources/scoring.py +++ b/src/llama_stack_client/resources/scoring.py @@ -102,7 +102,15 @@ def score_batch( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ScoringScoreBatchResponse: """ + Score a batch of rows. + Args: + dataset_id: The ID of the dataset to score. + + save_results_dataset: Whether to save the results to a dataset. + + scoring_functions: The scoring functions to use for the scoring. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -205,7 +213,15 @@ async def score_batch( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ScoringScoreBatchResponse: """ + Score a batch of rows. + Args: + dataset_id: The ID of the dataset to score. + + save_results_dataset: Whether to save the results to a dataset. + + scoring_functions: The scoring functions to use for the scoring. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/scoring_functions.py b/src/llama_stack_client/resources/scoring_functions.py index f01ff17b..07d9586f 100644 --- a/src/llama_stack_client/resources/scoring_functions.py +++ b/src/llama_stack_client/resources/scoring_functions.py @@ -59,6 +59,8 @@ def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ScoringFn: """ + Get a scoring function by its ID. + Args: extra_headers: Send extra headers @@ -88,6 +90,7 @@ def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ScoringFunctionListResponse: + """List all scoring functions.""" return self._get( "/v1/scoring-functions", options=make_request_options( @@ -117,7 +120,20 @@ def register( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Register a scoring function. + Args: + description: The description of the scoring function. + + scoring_fn_id: The ID of the scoring function to register. + + params: The parameters for the scoring function for benchmark eval, these can be + overridden for app eval. + + provider_id: The ID of the provider to use for the scoring function. + + provider_scoring_fn_id: The ID of the provider scoring function to use for the scoring function. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -179,6 +195,8 @@ async def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ScoringFn: """ + Get a scoring function by its ID. + Args: extra_headers: Send extra headers @@ -208,6 +226,7 @@ async def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ScoringFunctionListResponse: + """List all scoring functions.""" return await self._get( "/v1/scoring-functions", options=make_request_options( @@ -237,7 +256,20 @@ async def register( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Register a scoring function. + Args: + description: The description of the scoring function. + + scoring_fn_id: The ID of the scoring function to register. + + params: The parameters for the scoring function for benchmark eval, these can be + overridden for app eval. + + provider_id: The ID of the provider to use for the scoring function. + + provider_scoring_fn_id: The ID of the provider scoring function to use for the scoring function. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/shields.py b/src/llama_stack_client/resources/shields.py index 4ef88ac7..5982298a 100644 --- a/src/llama_stack_client/resources/shields.py +++ b/src/llama_stack_client/resources/shields.py @@ -57,6 +57,8 @@ def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Shield: """ + Get a shield by its identifier. + Args: extra_headers: Send extra headers @@ -86,6 +88,7 @@ def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ShieldListResponse: + """List all shields.""" return self._get( "/v1/shields", options=make_request_options( @@ -113,7 +116,17 @@ def register( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Shield: """ + Register a shield. + Args: + shield_id: The identifier of the shield to register. + + params: The parameters of the shield. + + provider_id: The identifier of the provider. + + provider_shield_id: The identifier of the shield in the provider. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -172,6 +185,8 @@ async def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Shield: """ + Get a shield by its identifier. + Args: extra_headers: Send extra headers @@ -201,6 +216,7 @@ async def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ShieldListResponse: + """List all shields.""" return await self._get( "/v1/shields", options=make_request_options( @@ -228,7 +244,17 @@ async def register( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Shield: """ + Register a shield. + Args: + shield_id: The identifier of the shield to register. + + params: The parameters of the shield. + + provider_id: The identifier of the provider. + + provider_shield_id: The identifier of the shield in the provider. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/telemetry.py b/src/llama_stack_client/resources/telemetry.py index 12261eee..96af4528 100644 --- a/src/llama_stack_client/resources/telemetry.py +++ b/src/llama_stack_client/resources/telemetry.py @@ -69,6 +69,8 @@ def get_span( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> TelemetryGetSpanResponse: """ + Get a span by its ID. + Args: extra_headers: Send extra headers @@ -104,7 +106,13 @@ def get_span_tree( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> TelemetryGetSpanTreeResponse: """ + Get a span tree by its ID. + Args: + attributes_to_return: The attributes to return in the tree. + + max_depth: The maximum depth of the tree. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -146,6 +154,8 @@ def get_trace( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Trace: """ + Get a trace by its ID. + Args: extra_headers: Send extra headers @@ -178,7 +188,13 @@ def log_event( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Log an event. + Args: + event: The event to log. + + ttl_seconds: The time to live of the event. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -217,7 +233,15 @@ def query_spans( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> TelemetryQuerySpansResponse: """ + Query spans. + Args: + attribute_filters: The attribute filters to apply to the spans. + + attributes_to_return: The attributes to return in the spans. + + max_depth: The maximum depth of the tree. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -261,7 +285,17 @@ def query_traces( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> TelemetryQueryTracesResponse: """ + Query traces. + Args: + attribute_filters: The attribute filters to apply to the traces. + + limit: The limit of traces to return. + + offset: The offset of the traces to return. + + order_by: The order by of the traces to return. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -306,7 +340,17 @@ def save_spans_to_dataset( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Save spans to a dataset. + Args: + attribute_filters: The attribute filters to apply to the spans. + + attributes_to_save: The attributes to save to the dataset. + + dataset_id: The ID of the dataset to save the spans to. + + max_depth: The maximum depth of the tree. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -367,6 +411,8 @@ async def get_span( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> TelemetryGetSpanResponse: """ + Get a span by its ID. + Args: extra_headers: Send extra headers @@ -402,7 +448,13 @@ async def get_span_tree( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> TelemetryGetSpanTreeResponse: """ + Get a span tree by its ID. + Args: + attributes_to_return: The attributes to return in the tree. + + max_depth: The maximum depth of the tree. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -444,6 +496,8 @@ async def get_trace( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Trace: """ + Get a trace by its ID. + Args: extra_headers: Send extra headers @@ -476,7 +530,13 @@ async def log_event( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Log an event. + Args: + event: The event to log. + + ttl_seconds: The time to live of the event. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -515,7 +575,15 @@ async def query_spans( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> TelemetryQuerySpansResponse: """ + Query spans. + Args: + attribute_filters: The attribute filters to apply to the spans. + + attributes_to_return: The attributes to return in the spans. + + max_depth: The maximum depth of the tree. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -559,7 +627,17 @@ async def query_traces( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> TelemetryQueryTracesResponse: """ + Query traces. + Args: + attribute_filters: The attribute filters to apply to the traces. + + limit: The limit of traces to return. + + offset: The offset of the traces to return. + + order_by: The order by of the traces to return. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -604,7 +682,17 @@ async def save_spans_to_dataset( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Save spans to a dataset. + Args: + attribute_filters: The attribute filters to apply to the spans. + + attributes_to_save: The attributes to save to the dataset. + + dataset_id: The ID of the dataset to save the spans to. + + max_depth: The maximum depth of the tree. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/tool_runtime/rag_tool.py b/src/llama_stack_client/resources/tool_runtime/rag_tool.py index 048ea980..0ed84418 100644 --- a/src/llama_stack_client/resources/tool_runtime/rag_tool.py +++ b/src/llama_stack_client/resources/tool_runtime/rag_tool.py @@ -107,6 +107,8 @@ def query( Args: content: A image content item + query_config: Configuration for the RAG query generation. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -213,6 +215,8 @@ async def query( Args: content: A image content item + query_config: Configuration for the RAG query generation. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py index dda3f661..1df58598 100644 --- a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py +++ b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py @@ -70,9 +70,13 @@ def invoke_tool( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ToolInvocationResult: """ - Run a tool with the given arguments + Run a tool with the given arguments. Args: + kwargs: A dictionary of arguments to pass to the tool. + + tool_name: The name of the tool to invoke. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -109,7 +113,13 @@ def list_tools( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ToolRuntimeListToolsResponse: """ + List all tools in the runtime. + Args: + mcp_endpoint: The MCP endpoint to use for the tool group. + + tool_group_id: The ID of the tool group to list tools for. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -175,9 +185,13 @@ async def invoke_tool( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ToolInvocationResult: """ - Run a tool with the given arguments + Run a tool with the given arguments. Args: + kwargs: A dictionary of arguments to pass to the tool. + + tool_name: The name of the tool to invoke. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -214,7 +228,13 @@ async def list_tools( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ToolRuntimeListToolsResponse: """ + List all tools in the runtime. + Args: + mcp_endpoint: The MCP endpoint to use for the tool group. + + tool_group_id: The ID of the tool group to list tools for. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request diff --git a/src/llama_stack_client/resources/toolgroups.py b/src/llama_stack_client/resources/toolgroups.py index d882a6eb..a618657d 100644 --- a/src/llama_stack_client/resources/toolgroups.py +++ b/src/llama_stack_client/resources/toolgroups.py @@ -55,7 +55,7 @@ def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ToolgroupListResponse: - """List tool groups with optional provider""" + """List tool groups with optional provider.""" return self._get( "/v1/toolgroups", options=make_request_options( @@ -80,6 +80,8 @@ def get( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ToolGroup: """ + Get a tool group by its ID. + Args: extra_headers: Send extra headers @@ -114,9 +116,17 @@ def register( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ - Register a tool group + Register a tool group. Args: + provider_id: The ID of the provider to use for the tool group. + + toolgroup_id: The ID of the tool group to register. + + args: A dictionary of arguments to pass to the tool group. + + mcp_endpoint: The MCP endpoint to use for the tool group. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -155,7 +165,7 @@ def unregister( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ - Unregister a tool group + Unregister a tool group. Args: extra_headers: Send extra headers @@ -208,7 +218,7 @@ async def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ToolgroupListResponse: - """List tool groups with optional provider""" + """List tool groups with optional provider.""" return await self._get( "/v1/toolgroups", options=make_request_options( @@ -233,6 +243,8 @@ async def get( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ToolGroup: """ + Get a tool group by its ID. + Args: extra_headers: Send extra headers @@ -267,9 +279,17 @@ async def register( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ - Register a tool group + Register a tool group. Args: + provider_id: The ID of the provider to use for the tool group. + + toolgroup_id: The ID of the tool group to register. + + args: A dictionary of arguments to pass to the tool group. + + mcp_endpoint: The MCP endpoint to use for the tool group. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -308,7 +328,7 @@ async def unregister( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ - Unregister a tool group + Unregister a tool group. Args: extra_headers: Send extra headers diff --git a/src/llama_stack_client/resources/tools.py b/src/llama_stack_client/resources/tools.py index 8a9b91e8..c9fd3808 100644 --- a/src/llama_stack_client/resources/tools.py +++ b/src/llama_stack_client/resources/tools.py @@ -57,9 +57,11 @@ def list( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ToolListResponse: """ - List tools with optional tool group + List tools with optional tool group. Args: + toolgroup_id: The ID of the tool group to list tools for. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -93,6 +95,8 @@ def get( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Tool: """ + Get a tool by its name. + Args: extra_headers: Send extra headers @@ -145,9 +149,11 @@ async def list( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ToolListResponse: """ - List tools with optional tool group + List tools with optional tool group. Args: + toolgroup_id: The ID of the tool group to list tools for. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -181,6 +187,8 @@ async def get( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> Tool: """ + Get a tool by its name. + Args: extra_headers: Send extra headers diff --git a/src/llama_stack_client/resources/vector_dbs.py b/src/llama_stack_client/resources/vector_dbs.py index c75d261d..c1ad232d 100644 --- a/src/llama_stack_client/resources/vector_dbs.py +++ b/src/llama_stack_client/resources/vector_dbs.py @@ -58,6 +58,8 @@ def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> VectorDBRetrieveResponse: """ + Get a vector database by its identifier. + Args: extra_headers: Send extra headers @@ -87,6 +89,7 @@ def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> VectorDBListResponse: + """List all vector databases.""" return self._get( "/v1/vector-dbs", options=make_request_options( @@ -115,7 +118,19 @@ def register( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> VectorDBRegisterResponse: """ + Register a vector database. + Args: + embedding_model: The embedding model to use. + + vector_db_id: The identifier of the vector database to register. + + embedding_dimension: The dimension of the embedding model. + + provider_id: The identifier of the provider. + + provider_vector_db_id: The identifier of the vector database in the provider. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -154,6 +169,8 @@ def unregister( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Unregister a vector database. + Args: extra_headers: Send extra headers @@ -207,6 +224,8 @@ async def retrieve( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> VectorDBRetrieveResponse: """ + Get a vector database by its identifier. + Args: extra_headers: Send extra headers @@ -236,6 +255,7 @@ async def list( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> VectorDBListResponse: + """List all vector databases.""" return await self._get( "/v1/vector-dbs", options=make_request_options( @@ -264,7 +284,19 @@ async def register( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> VectorDBRegisterResponse: """ + Register a vector database. + Args: + embedding_model: The embedding model to use. + + vector_db_id: The identifier of the vector database to register. + + embedding_dimension: The dimension of the embedding model. + + provider_id: The identifier of the provider. + + provider_vector_db_id: The identifier of the vector database in the provider. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -303,6 +335,8 @@ async def unregister( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Unregister a vector database. + Args: extra_headers: Send extra headers diff --git a/src/llama_stack_client/resources/vector_io.py b/src/llama_stack_client/resources/vector_io.py index 9b1e8822..7ec4691e 100644 --- a/src/llama_stack_client/resources/vector_io.py +++ b/src/llama_stack_client/resources/vector_io.py @@ -58,7 +58,15 @@ def insert( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Insert chunks into a vector database. + Args: + chunks: The chunks to insert. + + vector_db_id: The identifier of the vector database to insert the chunks into. + + ttl_seconds: The time to live of the chunks. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -98,8 +106,14 @@ def query( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> QueryChunksResponse: """ + Query chunks from a vector database. + Args: - query: A image content item + query: The query to search for. + + vector_db_id: The identifier of the vector database to query. + + params: The parameters of the query. extra_headers: Send extra headers @@ -160,7 +174,15 @@ async def insert( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> None: """ + Insert chunks into a vector database. + Args: + chunks: The chunks to insert. + + vector_db_id: The identifier of the vector database to insert the chunks into. + + ttl_seconds: The time to live of the chunks. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -200,8 +222,14 @@ async def query( timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> QueryChunksResponse: """ + Query chunks from a vector database. + Args: - query: A image content item + query: The query to search for. + + vector_db_id: The identifier of the vector database to query. + + params: The parameters of the query. extra_headers: Send extra headers diff --git a/src/llama_stack_client/types/benchmark.py b/src/llama_stack_client/types/benchmark.py index 3af66f6a..e0b1ce9e 100644 --- a/src/llama_stack_client/types/benchmark.py +++ b/src/llama_stack_client/types/benchmark.py @@ -1,6 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import Dict, List, Union +from typing import Dict, List, Union, Optional from typing_extensions import Literal from .._models import BaseModel @@ -17,8 +17,8 @@ class Benchmark(BaseModel): provider_id: str - provider_resource_id: str - scoring_functions: List[str] type: Literal["benchmark"] + + provider_resource_id: Optional[str] = None diff --git a/src/llama_stack_client/types/benchmark_register_params.py b/src/llama_stack_client/types/benchmark_register_params.py index def970a1..0fa9d508 100644 --- a/src/llama_stack_client/types/benchmark_register_params.py +++ b/src/llama_stack_client/types/benchmark_register_params.py @@ -10,13 +10,19 @@ class BenchmarkRegisterParams(TypedDict, total=False): benchmark_id: Required[str] + """The ID of the benchmark to register.""" dataset_id: Required[str] + """The ID of the dataset to use for the benchmark.""" scoring_functions: Required[List[str]] + """The scoring functions to use for the benchmark.""" metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + """The metadata to use for the benchmark.""" provider_benchmark_id: str + """The ID of the provider benchmark to use for the benchmark.""" provider_id: str + """The ID of the provider to use for the benchmark.""" diff --git a/src/llama_stack_client/types/chat/__init__.py b/src/llama_stack_client/types/chat/__init__.py index 9384ac14..27720e7f 100644 --- a/src/llama_stack_client/types/chat/__init__.py +++ b/src/llama_stack_client/types/chat/__init__.py @@ -2,5 +2,8 @@ from __future__ import annotations +from .completion_list_params import CompletionListParams as CompletionListParams from .completion_create_params import CompletionCreateParams as CompletionCreateParams +from .completion_list_response import CompletionListResponse as CompletionListResponse from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse +from .completion_retrieve_response import CompletionRetrieveResponse as CompletionRetrieveResponse diff --git a/src/llama_stack_client/types/chat/completion_create_params.py b/src/llama_stack_client/types/chat/completion_create_params.py index 0281420b..2c9d26f7 100644 --- a/src/llama_stack_client/types/chat/completion_create_params.py +++ b/src/llama_stack_client/types/chat/completion_create_params.py @@ -47,7 +47,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): messages: Required[Iterable[Message]] - """List of messages in the conversation""" + """List of messages in the conversation.""" model: Required[str] """The identifier of the model to use. @@ -57,64 +57,64 @@ class CompletionCreateParamsBase(TypedDict, total=False): """ frequency_penalty: float - """(Optional) The penalty for repeated tokens""" + """(Optional) The penalty for repeated tokens.""" function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] - """(Optional) The function call to use""" + """(Optional) The function call to use.""" functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] - """(Optional) List of functions to use""" + """(Optional) List of functions to use.""" logit_bias: Dict[str, float] - """(Optional) The logit bias to use""" + """(Optional) The logit bias to use.""" logprobs: bool - """(Optional) The log probabilities to use""" + """(Optional) The log probabilities to use.""" max_completion_tokens: int - """(Optional) The maximum number of tokens to generate""" + """(Optional) The maximum number of tokens to generate.""" max_tokens: int - """(Optional) The maximum number of tokens to generate""" + """(Optional) The maximum number of tokens to generate.""" n: int - """(Optional) The number of completions to generate""" + """(Optional) The number of completions to generate.""" parallel_tool_calls: bool - """(Optional) Whether to parallelize tool calls""" + """(Optional) Whether to parallelize tool calls.""" presence_penalty: float - """(Optional) The penalty for repeated tokens""" + """(Optional) The penalty for repeated tokens.""" response_format: ResponseFormat - """(Optional) The response format to use""" + """(Optional) The response format to use.""" seed: int - """(Optional) The seed to use""" + """(Optional) The seed to use.""" stop: Union[str, List[str]] - """(Optional) The stop tokens to use""" + """(Optional) The stop tokens to use.""" stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] - """(Optional) The stream options to use""" + """(Optional) The stream options to use.""" temperature: float - """(Optional) The temperature to use""" + """(Optional) The temperature to use.""" tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]] - """(Optional) The tool choice to use""" + """(Optional) The tool choice to use.""" tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] - """(Optional) The tools to use""" + """(Optional) The tools to use.""" top_logprobs: int - """(Optional) The top log probabilities to use""" + """(Optional) The top log probabilities to use.""" top_p: float - """(Optional) The top p to use""" + """(Optional) The top p to use.""" user: str - """(Optional) The user to use""" + """(Optional) The user to use.""" class MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(TypedDict, total=False): @@ -390,12 +390,12 @@ class ResponseFormatOpenAIResponseFormatJsonObject(TypedDict, total=False): class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False): stream: Literal[False] - """(Optional) Whether to stream the response""" + """(Optional) Whether to stream the response.""" class CompletionCreateParamsStreaming(CompletionCreateParamsBase): stream: Required[Literal[True]] - """(Optional) Whether to stream the response""" + """(Optional) Whether to stream the response.""" CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming] diff --git a/src/llama_stack_client/types/chat/completion_list_params.py b/src/llama_stack_client/types/chat/completion_list_params.py new file mode 100644 index 00000000..5fb77c2c --- /dev/null +++ b/src/llama_stack_client/types/chat/completion_list_params.py @@ -0,0 +1,21 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Literal, TypedDict + +__all__ = ["CompletionListParams"] + + +class CompletionListParams(TypedDict, total=False): + after: str + """The ID of the last chat completion to return.""" + + limit: int + """The maximum number of chat completions to return.""" + + model: str + """The model to filter by.""" + + order: Literal["asc", "desc"] + """The order to sort the chat completions by: "asc" or "desc". Defaults to "desc".""" diff --git a/src/llama_stack_client/types/chat/completion_list_response.py b/src/llama_stack_client/types/chat/completion_list_response.py new file mode 100644 index 00000000..d3b580a1 --- /dev/null +++ b/src/llama_stack_client/types/chat/completion_list_response.py @@ -0,0 +1,667 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Union, Optional +from typing_extensions import Literal, Annotated, TypeAlias + +from ..._utils import PropertyInfo +from ..._models import BaseModel + +__all__ = [ + "CompletionListResponse", + "Data", + "DataChoice", + "DataChoiceMessage", + "DataChoiceMessageOpenAIUserMessageParam", + "DataChoiceMessageOpenAIUserMessageParamContentUnionMember1", + "DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "DataChoiceMessageOpenAISystemMessageParam", + "DataChoiceMessageOpenAISystemMessageParamContentUnionMember1", + "DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "DataChoiceMessageOpenAIAssistantMessageParam", + "DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1", + "DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "DataChoiceMessageOpenAIAssistantMessageParamToolCall", + "DataChoiceMessageOpenAIAssistantMessageParamToolCallFunction", + "DataChoiceMessageOpenAIToolMessageParam", + "DataChoiceMessageOpenAIToolMessageParamContentUnionMember1", + "DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "DataChoiceMessageOpenAIDeveloperMessageParam", + "DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1", + "DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "DataChoiceLogprobs", + "DataChoiceLogprobsContent", + "DataChoiceLogprobsContentTopLogprob", + "DataChoiceLogprobsRefusal", + "DataChoiceLogprobsRefusalTopLogprob", + "DataInputMessage", + "DataInputMessageOpenAIUserMessageParam", + "DataInputMessageOpenAIUserMessageParamContentUnionMember1", + "DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "DataInputMessageOpenAISystemMessageParam", + "DataInputMessageOpenAISystemMessageParamContentUnionMember1", + "DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "DataInputMessageOpenAIAssistantMessageParam", + "DataInputMessageOpenAIAssistantMessageParamContentUnionMember1", + "DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "DataInputMessageOpenAIAssistantMessageParamToolCall", + "DataInputMessageOpenAIAssistantMessageParamToolCallFunction", + "DataInputMessageOpenAIToolMessageParam", + "DataInputMessageOpenAIToolMessageParamContentUnionMember1", + "DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "DataInputMessageOpenAIDeveloperMessageParam", + "DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1", + "DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", +] + + +class DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: ( + DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +DataChoiceMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class DataChoiceMessageOpenAIUserMessageParam(BaseModel): + content: Union[str, List[DataChoiceMessageOpenAIUserMessageParamContentUnionMember1]] + """The content of the message, which can include text and other media""" + + role: Literal["user"] + """Must be "user" to identify this as a user message""" + + name: Optional[str] = None + """(Optional) The name of the user message participant.""" + + +class DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: ( + DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +DataChoiceMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class DataChoiceMessageOpenAISystemMessageParam(BaseModel): + content: Union[str, List[DataChoiceMessageOpenAISystemMessageParamContentUnionMember1]] + """The content of the "system prompt". + + If multiple system messages are provided, they are concatenated. The underlying + Llama Stack code may also add other system messages (for example, for formatting + tool definitions). + """ + + role: Literal["system"] + """Must be "system" to identify this as a system message""" + + name: Optional[str] = None + """(Optional) The name of the system message participant.""" + + +class DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam( + BaseModel +): + text: str + + type: Literal["text"] + + +class DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam( + BaseModel +): + image_url: ( + DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class DataChoiceMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel): + arguments: Optional[str] = None + + name: Optional[str] = None + + +class DataChoiceMessageOpenAIAssistantMessageParamToolCall(BaseModel): + type: Literal["function"] + + id: Optional[str] = None + + function: Optional[DataChoiceMessageOpenAIAssistantMessageParamToolCallFunction] = None + + index: Optional[int] = None + + +class DataChoiceMessageOpenAIAssistantMessageParam(BaseModel): + role: Literal["assistant"] + """Must be "assistant" to identify this as the model's response""" + + content: Union[str, List[DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1], None] = None + """The content of the model's response""" + + name: Optional[str] = None + """(Optional) The name of the assistant message participant.""" + + tool_calls: Optional[List[DataChoiceMessageOpenAIAssistantMessageParamToolCall]] = None + """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object.""" + + +class DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: ( + DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +DataChoiceMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class DataChoiceMessageOpenAIToolMessageParam(BaseModel): + content: Union[str, List[DataChoiceMessageOpenAIToolMessageParamContentUnionMember1]] + """The response content from the tool""" + + role: Literal["tool"] + """Must be "tool" to identify this as a tool response""" + + tool_call_id: str + """Unique identifier for the tool call this response is for""" + + +class DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam( + BaseModel +): + text: str + + type: Literal["text"] + + +class DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam( + BaseModel +): + image_url: ( + DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class DataChoiceMessageOpenAIDeveloperMessageParam(BaseModel): + content: Union[str, List[DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1]] + """The content of the developer message""" + + role: Literal["developer"] + """Must be "developer" to identify this as a developer message""" + + name: Optional[str] = None + """(Optional) The name of the developer message participant.""" + + +DataChoiceMessage: TypeAlias = Annotated[ + Union[ + DataChoiceMessageOpenAIUserMessageParam, + DataChoiceMessageOpenAISystemMessageParam, + DataChoiceMessageOpenAIAssistantMessageParam, + DataChoiceMessageOpenAIToolMessageParam, + DataChoiceMessageOpenAIDeveloperMessageParam, + ], + PropertyInfo(discriminator="role"), +] + + +class DataChoiceLogprobsContentTopLogprob(BaseModel): + token: str + + logprob: float + + bytes: Optional[List[int]] = None + + +class DataChoiceLogprobsContent(BaseModel): + token: str + + logprob: float + + top_logprobs: List[DataChoiceLogprobsContentTopLogprob] + + bytes: Optional[List[int]] = None + + +class DataChoiceLogprobsRefusalTopLogprob(BaseModel): + token: str + + logprob: float + + bytes: Optional[List[int]] = None + + +class DataChoiceLogprobsRefusal(BaseModel): + token: str + + logprob: float + + top_logprobs: List[DataChoiceLogprobsRefusalTopLogprob] + + bytes: Optional[List[int]] = None + + +class DataChoiceLogprobs(BaseModel): + content: Optional[List[DataChoiceLogprobsContent]] = None + """(Optional) The log probabilities for the tokens in the message""" + + refusal: Optional[List[DataChoiceLogprobsRefusal]] = None + """(Optional) The log probabilities for the tokens in the message""" + + +class DataChoice(BaseModel): + finish_reason: str + """The reason the model stopped generating""" + + index: int + """The index of the choice""" + + message: DataChoiceMessage + """The message from the model""" + + logprobs: Optional[DataChoiceLogprobs] = None + """(Optional) The log probabilities for the tokens in the message""" + + +class DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: ( + DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +DataInputMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class DataInputMessageOpenAIUserMessageParam(BaseModel): + content: Union[str, List[DataInputMessageOpenAIUserMessageParamContentUnionMember1]] + """The content of the message, which can include text and other media""" + + role: Literal["user"] + """Must be "user" to identify this as a user message""" + + name: Optional[str] = None + """(Optional) The name of the user message participant.""" + + +class DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: ( + DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +DataInputMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class DataInputMessageOpenAISystemMessageParam(BaseModel): + content: Union[str, List[DataInputMessageOpenAISystemMessageParamContentUnionMember1]] + """The content of the "system prompt". + + If multiple system messages are provided, they are concatenated. The underlying + Llama Stack code may also add other system messages (for example, for formatting + tool definitions). + """ + + role: Literal["system"] + """Must be "system" to identify this as a system message""" + + name: Optional[str] = None + """(Optional) The name of the system message participant.""" + + +class DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam( + BaseModel +): + image_url: ( + DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +DataInputMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class DataInputMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel): + arguments: Optional[str] = None + + name: Optional[str] = None + + +class DataInputMessageOpenAIAssistantMessageParamToolCall(BaseModel): + type: Literal["function"] + + id: Optional[str] = None + + function: Optional[DataInputMessageOpenAIAssistantMessageParamToolCallFunction] = None + + index: Optional[int] = None + + +class DataInputMessageOpenAIAssistantMessageParam(BaseModel): + role: Literal["assistant"] + """Must be "assistant" to identify this as the model's response""" + + content: Union[str, List[DataInputMessageOpenAIAssistantMessageParamContentUnionMember1], None] = None + """The content of the model's response""" + + name: Optional[str] = None + """(Optional) The name of the assistant message participant.""" + + tool_calls: Optional[List[DataInputMessageOpenAIAssistantMessageParamToolCall]] = None + """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object.""" + + +class DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: ( + DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +DataInputMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class DataInputMessageOpenAIToolMessageParam(BaseModel): + content: Union[str, List[DataInputMessageOpenAIToolMessageParamContentUnionMember1]] + """The response content from the tool""" + + role: Literal["tool"] + """Must be "tool" to identify this as a tool response""" + + tool_call_id: str + """Unique identifier for the tool call this response is for""" + + +class DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam( + BaseModel +): + image_url: ( + DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class DataInputMessageOpenAIDeveloperMessageParam(BaseModel): + content: Union[str, List[DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1]] + """The content of the developer message""" + + role: Literal["developer"] + """Must be "developer" to identify this as a developer message""" + + name: Optional[str] = None + """(Optional) The name of the developer message participant.""" + + +DataInputMessage: TypeAlias = Annotated[ + Union[ + DataInputMessageOpenAIUserMessageParam, + DataInputMessageOpenAISystemMessageParam, + DataInputMessageOpenAIAssistantMessageParam, + DataInputMessageOpenAIToolMessageParam, + DataInputMessageOpenAIDeveloperMessageParam, + ], + PropertyInfo(discriminator="role"), +] + + +class Data(BaseModel): + id: str + """The ID of the chat completion""" + + choices: List[DataChoice] + """List of choices""" + + created: int + """The Unix timestamp in seconds when the chat completion was created""" + + input_messages: List[DataInputMessage] + + model: str + """The model that was used to generate the chat completion""" + + object: Literal["chat.completion"] + """The object type, which will be "chat.completion" """ + + +class CompletionListResponse(BaseModel): + data: List[Data] + + first_id: str + + has_more: bool + + last_id: str + + object: Literal["list"] diff --git a/src/llama_stack_client/types/chat/completion_retrieve_response.py b/src/llama_stack_client/types/chat/completion_retrieve_response.py new file mode 100644 index 00000000..330c752d --- /dev/null +++ b/src/llama_stack_client/types/chat/completion_retrieve_response.py @@ -0,0 +1,626 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Union, Optional +from typing_extensions import Literal, Annotated, TypeAlias + +from ..._utils import PropertyInfo +from ..._models import BaseModel + +__all__ = [ + "CompletionRetrieveResponse", + "Choice", + "ChoiceMessage", + "ChoiceMessageOpenAIUserMessageParam", + "ChoiceMessageOpenAIUserMessageParamContentUnionMember1", + "ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "ChoiceMessageOpenAISystemMessageParam", + "ChoiceMessageOpenAISystemMessageParamContentUnionMember1", + "ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "ChoiceMessageOpenAIAssistantMessageParam", + "ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1", + "ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "ChoiceMessageOpenAIAssistantMessageParamToolCall", + "ChoiceMessageOpenAIAssistantMessageParamToolCallFunction", + "ChoiceMessageOpenAIToolMessageParam", + "ChoiceMessageOpenAIToolMessageParamContentUnionMember1", + "ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "ChoiceMessageOpenAIDeveloperMessageParam", + "ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1", + "ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "ChoiceLogprobs", + "ChoiceLogprobsContent", + "ChoiceLogprobsContentTopLogprob", + "ChoiceLogprobsRefusal", + "ChoiceLogprobsRefusalTopLogprob", + "InputMessage", + "InputMessageOpenAIUserMessageParam", + "InputMessageOpenAIUserMessageParamContentUnionMember1", + "InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "InputMessageOpenAISystemMessageParam", + "InputMessageOpenAISystemMessageParamContentUnionMember1", + "InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "InputMessageOpenAIAssistantMessageParam", + "InputMessageOpenAIAssistantMessageParamContentUnionMember1", + "InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "InputMessageOpenAIAssistantMessageParamToolCall", + "InputMessageOpenAIAssistantMessageParamToolCallFunction", + "InputMessageOpenAIToolMessageParam", + "InputMessageOpenAIToolMessageParamContentUnionMember1", + "InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", + "InputMessageOpenAIDeveloperMessageParam", + "InputMessageOpenAIDeveloperMessageParamContentUnionMember1", + "InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam", + "InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam", + "InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL", +] + + +class ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +ChoiceMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class ChoiceMessageOpenAIUserMessageParam(BaseModel): + content: Union[str, List[ChoiceMessageOpenAIUserMessageParamContentUnionMember1]] + """The content of the message, which can include text and other media""" + + role: Literal["user"] + """Must be "user" to identify this as a user message""" + + name: Optional[str] = None + """(Optional) The name of the user message participant.""" + + +class ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +ChoiceMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class ChoiceMessageOpenAISystemMessageParam(BaseModel): + content: Union[str, List[ChoiceMessageOpenAISystemMessageParamContentUnionMember1]] + """The content of the "system prompt". + + If multiple system messages are provided, they are concatenated. The underlying + Llama Stack code may also add other system messages (for example, for formatting + tool definitions). + """ + + role: Literal["system"] + """Must be "system" to identify this as a system message""" + + name: Optional[str] = None + """(Optional) The name of the system message participant.""" + + +class ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: ( + ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class ChoiceMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel): + arguments: Optional[str] = None + + name: Optional[str] = None + + +class ChoiceMessageOpenAIAssistantMessageParamToolCall(BaseModel): + type: Literal["function"] + + id: Optional[str] = None + + function: Optional[ChoiceMessageOpenAIAssistantMessageParamToolCallFunction] = None + + index: Optional[int] = None + + +class ChoiceMessageOpenAIAssistantMessageParam(BaseModel): + role: Literal["assistant"] + """Must be "assistant" to identify this as the model's response""" + + content: Union[str, List[ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1], None] = None + """The content of the model's response""" + + name: Optional[str] = None + """(Optional) The name of the assistant message participant.""" + + tool_calls: Optional[List[ChoiceMessageOpenAIAssistantMessageParamToolCall]] = None + """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object.""" + + +class ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +ChoiceMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class ChoiceMessageOpenAIToolMessageParam(BaseModel): + content: Union[str, List[ChoiceMessageOpenAIToolMessageParamContentUnionMember1]] + """The response content from the tool""" + + role: Literal["tool"] + """Must be "tool" to identify this as a tool response""" + + tool_call_id: str + """Unique identifier for the tool call this response is for""" + + +class ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: ( + ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class ChoiceMessageOpenAIDeveloperMessageParam(BaseModel): + content: Union[str, List[ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1]] + """The content of the developer message""" + + role: Literal["developer"] + """Must be "developer" to identify this as a developer message""" + + name: Optional[str] = None + """(Optional) The name of the developer message participant.""" + + +ChoiceMessage: TypeAlias = Annotated[ + Union[ + ChoiceMessageOpenAIUserMessageParam, + ChoiceMessageOpenAISystemMessageParam, + ChoiceMessageOpenAIAssistantMessageParam, + ChoiceMessageOpenAIToolMessageParam, + ChoiceMessageOpenAIDeveloperMessageParam, + ], + PropertyInfo(discriminator="role"), +] + + +class ChoiceLogprobsContentTopLogprob(BaseModel): + token: str + + logprob: float + + bytes: Optional[List[int]] = None + + +class ChoiceLogprobsContent(BaseModel): + token: str + + logprob: float + + top_logprobs: List[ChoiceLogprobsContentTopLogprob] + + bytes: Optional[List[int]] = None + + +class ChoiceLogprobsRefusalTopLogprob(BaseModel): + token: str + + logprob: float + + bytes: Optional[List[int]] = None + + +class ChoiceLogprobsRefusal(BaseModel): + token: str + + logprob: float + + top_logprobs: List[ChoiceLogprobsRefusalTopLogprob] + + bytes: Optional[List[int]] = None + + +class ChoiceLogprobs(BaseModel): + content: Optional[List[ChoiceLogprobsContent]] = None + """(Optional) The log probabilities for the tokens in the message""" + + refusal: Optional[List[ChoiceLogprobsRefusal]] = None + """(Optional) The log probabilities for the tokens in the message""" + + +class Choice(BaseModel): + finish_reason: str + """The reason the model stopped generating""" + + index: int + """The index of the choice""" + + message: ChoiceMessage + """The message from the model""" + + logprobs: Optional[ChoiceLogprobs] = None + """(Optional) The log probabilities for the tokens in the message""" + + +class InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(BaseModel): + url: str + + detail: Optional[str] = None + + +class InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +InputMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class InputMessageOpenAIUserMessageParam(BaseModel): + content: Union[str, List[InputMessageOpenAIUserMessageParamContentUnionMember1]] + """The content of the message, which can include text and other media""" + + role: Literal["user"] + """Must be "user" to identify this as a user message""" + + name: Optional[str] = None + """(Optional) The name of the user message participant.""" + + +class InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +InputMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class InputMessageOpenAISystemMessageParam(BaseModel): + content: Union[str, List[InputMessageOpenAISystemMessageParamContentUnionMember1]] + """The content of the "system prompt". + + If multiple system messages are provided, they are concatenated. The underlying + Llama Stack code may also add other system messages (for example, for formatting + tool definitions). + """ + + role: Literal["system"] + """Must be "system" to identify this as a system message""" + + name: Optional[str] = None + """(Optional) The name of the system message participant.""" + + +class InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: ( + InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +InputMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class InputMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel): + arguments: Optional[str] = None + + name: Optional[str] = None + + +class InputMessageOpenAIAssistantMessageParamToolCall(BaseModel): + type: Literal["function"] + + id: Optional[str] = None + + function: Optional[InputMessageOpenAIAssistantMessageParamToolCallFunction] = None + + index: Optional[int] = None + + +class InputMessageOpenAIAssistantMessageParam(BaseModel): + role: Literal["assistant"] + """Must be "assistant" to identify this as the model's response""" + + content: Union[str, List[InputMessageOpenAIAssistantMessageParamContentUnionMember1], None] = None + """The content of the model's response""" + + name: Optional[str] = None + """(Optional) The name of the assistant message participant.""" + + tool_calls: Optional[List[InputMessageOpenAIAssistantMessageParamToolCall]] = None + """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object.""" + + +class InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(BaseModel): + url: str + + detail: Optional[str] = None + + +class InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + + type: Literal["image_url"] + + +InputMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class InputMessageOpenAIToolMessageParam(BaseModel): + content: Union[str, List[InputMessageOpenAIToolMessageParamContentUnionMember1]] + """The response content from the tool""" + + role: Literal["tool"] + """Must be "tool" to identify this as a tool response""" + + tool_call_id: str + """Unique identifier for the tool call this response is for""" + + +class InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel): + text: str + + type: Literal["text"] + + +class InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL( + BaseModel +): + url: str + + detail: Optional[str] = None + + +class InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel): + image_url: ( + InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL + ) + + type: Literal["image_url"] + + +InputMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[ + Union[ + InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam, + InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam, + ], + PropertyInfo(discriminator="type"), +] + + +class InputMessageOpenAIDeveloperMessageParam(BaseModel): + content: Union[str, List[InputMessageOpenAIDeveloperMessageParamContentUnionMember1]] + """The content of the developer message""" + + role: Literal["developer"] + """Must be "developer" to identify this as a developer message""" + + name: Optional[str] = None + """(Optional) The name of the developer message participant.""" + + +InputMessage: TypeAlias = Annotated[ + Union[ + InputMessageOpenAIUserMessageParam, + InputMessageOpenAISystemMessageParam, + InputMessageOpenAIAssistantMessageParam, + InputMessageOpenAIToolMessageParam, + InputMessageOpenAIDeveloperMessageParam, + ], + PropertyInfo(discriminator="role"), +] + + +class CompletionRetrieveResponse(BaseModel): + id: str + """The ID of the chat completion""" + + choices: List[Choice] + """List of choices""" + + created: int + """The Unix timestamp in seconds when the chat completion was created""" + + input_messages: List[InputMessage] + + model: str + """The model that was used to generate the chat completion""" + + object: Literal["chat.completion"] + """The object type, which will be "chat.completion" """ diff --git a/src/llama_stack_client/types/completion_create_params.py b/src/llama_stack_client/types/completion_create_params.py index a92b733e..e43f05e4 100644 --- a/src/llama_stack_client/types/completion_create_params.py +++ b/src/llama_stack_client/types/completion_create_params.py @@ -17,63 +17,63 @@ class CompletionCreateParamsBase(TypedDict, total=False): """ prompt: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]]]] - """The prompt to generate a completion for""" + """The prompt to generate a completion for.""" best_of: int - """(Optional) The number of completions to generate""" + """(Optional) The number of completions to generate.""" echo: bool - """(Optional) Whether to echo the prompt""" + """(Optional) Whether to echo the prompt.""" frequency_penalty: float - """(Optional) The penalty for repeated tokens""" + """(Optional) The penalty for repeated tokens.""" guided_choice: List[str] logit_bias: Dict[str, float] - """(Optional) The logit bias to use""" + """(Optional) The logit bias to use.""" logprobs: bool - """(Optional) The log probabilities to use""" + """(Optional) The log probabilities to use.""" max_tokens: int - """(Optional) The maximum number of tokens to generate""" + """(Optional) The maximum number of tokens to generate.""" n: int - """(Optional) The number of completions to generate""" + """(Optional) The number of completions to generate.""" presence_penalty: float - """(Optional) The penalty for repeated tokens""" + """(Optional) The penalty for repeated tokens.""" prompt_logprobs: int seed: int - """(Optional) The seed to use""" + """(Optional) The seed to use.""" stop: Union[str, List[str]] - """(Optional) The stop tokens to use""" + """(Optional) The stop tokens to use.""" stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] - """(Optional) The stream options to use""" + """(Optional) The stream options to use.""" temperature: float - """(Optional) The temperature to use""" + """(Optional) The temperature to use.""" top_p: float - """(Optional) The top p to use""" + """(Optional) The top p to use.""" user: str - """(Optional) The user to use""" + """(Optional) The user to use.""" class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False): stream: Literal[False] - """(Optional) Whether to stream the response""" + """(Optional) Whether to stream the response.""" class CompletionCreateParamsStreaming(CompletionCreateParamsBase): stream: Required[Literal[True]] - """(Optional) Whether to stream the response""" + """(Optional) Whether to stream the response.""" CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming] diff --git a/src/llama_stack_client/types/dataset_list_response.py b/src/llama_stack_client/types/dataset_list_response.py index 902c6274..42b27ab4 100644 --- a/src/llama_stack_client/types/dataset_list_response.py +++ b/src/llama_stack_client/types/dataset_list_response.py @@ -1,6 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import Dict, List, Union +from typing import Dict, List, Union, Optional from typing_extensions import Literal, Annotated, TypeAlias from .._utils import PropertyInfo @@ -50,8 +50,6 @@ class DatasetListResponseItem(BaseModel): provider_id: str - provider_resource_id: str - purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"] """Purpose of the dataset. Each purpose has a required input data schema.""" @@ -60,5 +58,7 @@ class DatasetListResponseItem(BaseModel): type: Literal["dataset"] + provider_resource_id: Optional[str] = None + DatasetListResponse: TypeAlias = List[DatasetListResponseItem] diff --git a/src/llama_stack_client/types/dataset_register_params.py b/src/llama_stack_client/types/dataset_register_params.py index 824dd0a9..6fd5db3f 100644 --- a/src/llama_stack_client/types/dataset_register_params.py +++ b/src/llama_stack_client/types/dataset_register_params.py @@ -12,7 +12,7 @@ class DatasetRegisterParams(TypedDict, total=False): purpose: Required[Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]] """The purpose of the dataset. - One of - "post-training/messages": The dataset contains a messages column with + One of: - "post-training/messages": The dataset contains a messages column with list of messages for post-training. { "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset contains a question column and an answer @@ -41,7 +41,7 @@ class DatasetRegisterParams(TypedDict, total=False): """The ID of the dataset. If not provided, an ID will be generated.""" metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] - """The metadata for the dataset. - E.g. {"description": "My dataset"}""" + """The metadata for the dataset. - E.g. {"description": "My dataset"}.""" class SourceUriDataSource(TypedDict, total=False): diff --git a/src/llama_stack_client/types/dataset_register_response.py b/src/llama_stack_client/types/dataset_register_response.py index 8038b192..a79367bb 100644 --- a/src/llama_stack_client/types/dataset_register_response.py +++ b/src/llama_stack_client/types/dataset_register_response.py @@ -1,6 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import Dict, List, Union +from typing import Dict, List, Union, Optional from typing_extensions import Literal, Annotated, TypeAlias from .._utils import PropertyInfo @@ -41,8 +41,6 @@ class DatasetRegisterResponse(BaseModel): provider_id: str - provider_resource_id: str - purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"] """Purpose of the dataset. Each purpose has a required input data schema.""" @@ -50,3 +48,5 @@ class DatasetRegisterResponse(BaseModel): """A dataset that can be obtained from a URI.""" type: Literal["dataset"] + + provider_resource_id: Optional[str] = None diff --git a/src/llama_stack_client/types/dataset_retrieve_response.py b/src/llama_stack_client/types/dataset_retrieve_response.py index debce418..ab96c387 100644 --- a/src/llama_stack_client/types/dataset_retrieve_response.py +++ b/src/llama_stack_client/types/dataset_retrieve_response.py @@ -1,6 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import Dict, List, Union +from typing import Dict, List, Union, Optional from typing_extensions import Literal, Annotated, TypeAlias from .._utils import PropertyInfo @@ -41,8 +41,6 @@ class DatasetRetrieveResponse(BaseModel): provider_id: str - provider_resource_id: str - purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"] """Purpose of the dataset. Each purpose has a required input data schema.""" @@ -50,3 +48,5 @@ class DatasetRetrieveResponse(BaseModel): """A dataset that can be obtained from a URI.""" type: Literal["dataset"] + + provider_resource_id: Optional[str] = None diff --git a/src/llama_stack_client/types/inference_batch_chat_completion_params.py b/src/llama_stack_client/types/inference_batch_chat_completion_params.py index ca53fdbf..b5da0f0e 100644 --- a/src/llama_stack_client/types/inference_batch_chat_completion_params.py +++ b/src/llama_stack_client/types/inference_batch_chat_completion_params.py @@ -15,21 +15,32 @@ class InferenceBatchChatCompletionParams(TypedDict, total=False): messages_batch: Required[Iterable[Iterable[Message]]] + """The messages to generate completions for.""" model_id: Required[str] + """The identifier of the model to use. + + The model must be registered with Llama Stack and available via the /models + endpoint. + """ logprobs: Logprobs + """ + (Optional) If specified, log probabilities for each token position will be + returned. + """ response_format: ResponseFormat - """Configuration for JSON schema-guided response generation.""" + """(Optional) Grammar specification for guided (structured) decoding.""" sampling_params: SamplingParams - """Sampling parameters.""" + """(Optional) Parameters to control the sampling strategy.""" tool_config: ToolConfig - """Configuration for tool use.""" + """(Optional) Configuration for tool use.""" tools: Iterable[Tool] + """(Optional) List of tool definitions available to the model.""" class Logprobs(TypedDict, total=False): diff --git a/src/llama_stack_client/types/inference_batch_completion_params.py b/src/llama_stack_client/types/inference_batch_completion_params.py index cbeb9309..d3db8e13 100644 --- a/src/llama_stack_client/types/inference_batch_completion_params.py +++ b/src/llama_stack_client/types/inference_batch_completion_params.py @@ -14,16 +14,26 @@ class InferenceBatchCompletionParams(TypedDict, total=False): content_batch: Required[List[InterleavedContent]] + """The content to generate completions for.""" model_id: Required[str] + """The identifier of the model to use. + + The model must be registered with Llama Stack and available via the /models + endpoint. + """ logprobs: Logprobs + """ + (Optional) If specified, log probabilities for each token position will be + returned. + """ response_format: ResponseFormat - """Configuration for JSON schema-guided response generation.""" + """(Optional) Grammar specification for guided (structured) decoding.""" sampling_params: SamplingParams - """Sampling parameters.""" + """(Optional) Parameters to control the sampling strategy.""" class Logprobs(TypedDict, total=False): diff --git a/src/llama_stack_client/types/inference_chat_completion_params.py b/src/llama_stack_client/types/inference_chat_completion_params.py index 18eb1bf7..746d3dee 100644 --- a/src/llama_stack_client/types/inference_chat_completion_params.py +++ b/src/llama_stack_client/types/inference_chat_completion_params.py @@ -22,7 +22,7 @@ class InferenceChatCompletionParamsBase(TypedDict, total=False): messages: Required[Iterable[Message]] - """List of messages in the conversation""" + """List of messages in the conversation.""" model_id: Required[str] """The identifier of the model to use. @@ -47,7 +47,7 @@ class InferenceChatCompletionParamsBase(TypedDict, total=False): """ sampling_params: SamplingParams - """Parameters to control the sampling strategy""" + """Parameters to control the sampling strategy.""" tool_choice: Literal["auto", "required", "none"] """(Optional) Whether tool use is required or automatic. @@ -70,7 +70,7 @@ class InferenceChatCompletionParamsBase(TypedDict, total=False): """ tools: Iterable[Tool] - """(Optional) List of tool definitions available to the model""" + """(Optional) List of tool definitions available to the model.""" class Logprobs(TypedDict, total=False): diff --git a/src/llama_stack_client/types/inference_completion_params.py b/src/llama_stack_client/types/inference_completion_params.py index 60ccefce..c122f017 100644 --- a/src/llama_stack_client/types/inference_completion_params.py +++ b/src/llama_stack_client/types/inference_completion_params.py @@ -19,7 +19,7 @@ class InferenceCompletionParamsBase(TypedDict, total=False): content: Required[InterleavedContent] - """The content to generate a completion for""" + """The content to generate a completion for.""" model_id: Required[str] """The identifier of the model to use. @@ -35,10 +35,10 @@ class InferenceCompletionParamsBase(TypedDict, total=False): """ response_format: ResponseFormat - """(Optional) Grammar specification for guided (structured) decoding""" + """(Optional) Grammar specification for guided (structured) decoding.""" sampling_params: SamplingParams - """(Optional) Parameters to control the sampling strategy""" + """(Optional) Parameters to control the sampling strategy.""" class Logprobs(TypedDict, total=False): diff --git a/src/llama_stack_client/types/inference_step.py b/src/llama_stack_client/types/inference_step.py index bfd92166..2aecb193 100644 --- a/src/llama_stack_client/types/inference_step.py +++ b/src/llama_stack_client/types/inference_step.py @@ -20,6 +20,7 @@ class InferenceStep(BaseModel): """The ID of the step.""" step_type: Literal["inference"] + """Type of the step in an agent turn.""" turn_id: str """The ID of the turn.""" diff --git a/src/llama_stack_client/types/memory_retrieval_step.py b/src/llama_stack_client/types/memory_retrieval_step.py index bd8ab735..887e9986 100644 --- a/src/llama_stack_client/types/memory_retrieval_step.py +++ b/src/llama_stack_client/types/memory_retrieval_step.py @@ -18,6 +18,7 @@ class MemoryRetrievalStep(BaseModel): """The ID of the step.""" step_type: Literal["memory_retrieval"] + """Type of the step in an agent turn.""" turn_id: str """The ID of the turn.""" diff --git a/src/llama_stack_client/types/model.py b/src/llama_stack_client/types/model.py index afa12810..dea24d53 100644 --- a/src/llama_stack_client/types/model.py +++ b/src/llama_stack_client/types/model.py @@ -1,6 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import Dict, List, Union +from typing import Dict, List, Union, Optional from typing_extensions import Literal from pydantic import Field as FieldInfo @@ -19,6 +19,6 @@ class Model(BaseModel): provider_id: str - provider_resource_id: str - type: Literal["model"] + + provider_resource_id: Optional[str] = None diff --git a/src/llama_stack_client/types/model_register_params.py b/src/llama_stack_client/types/model_register_params.py index be5d72cc..fb1d9fb6 100644 --- a/src/llama_stack_client/types/model_register_params.py +++ b/src/llama_stack_client/types/model_register_params.py @@ -10,11 +10,16 @@ class ModelRegisterParams(TypedDict, total=False): model_id: Required[str] + """The identifier of the model to register.""" metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + """Any additional metadata for this model.""" model_type: Literal["llm", "embedding"] + """The type of model to register.""" provider_id: str + """The identifier of the provider.""" provider_model_id: str + """The identifier of the model in the provider.""" diff --git a/src/llama_stack_client/types/post_training/job_artifacts_params.py b/src/llama_stack_client/types/post_training/job_artifacts_params.py index 4f75a133..851ebf5f 100644 --- a/src/llama_stack_client/types/post_training/job_artifacts_params.py +++ b/src/llama_stack_client/types/post_training/job_artifacts_params.py @@ -9,3 +9,4 @@ class JobArtifactsParams(TypedDict, total=False): job_uuid: Required[str] + """The UUID of the job to get the artifacts of.""" diff --git a/src/llama_stack_client/types/post_training/job_cancel_params.py b/src/llama_stack_client/types/post_training/job_cancel_params.py index c9c30d84..3a976e87 100644 --- a/src/llama_stack_client/types/post_training/job_cancel_params.py +++ b/src/llama_stack_client/types/post_training/job_cancel_params.py @@ -9,3 +9,4 @@ class JobCancelParams(TypedDict, total=False): job_uuid: Required[str] + """The UUID of the job to cancel.""" diff --git a/src/llama_stack_client/types/post_training/job_status_params.py b/src/llama_stack_client/types/post_training/job_status_params.py index 8cf17b03..d5e040e0 100644 --- a/src/llama_stack_client/types/post_training/job_status_params.py +++ b/src/llama_stack_client/types/post_training/job_status_params.py @@ -9,3 +9,4 @@ class JobStatusParams(TypedDict, total=False): job_uuid: Required[str] + """The UUID of the job to get the status of.""" diff --git a/src/llama_stack_client/types/post_training_preference_optimize_params.py b/src/llama_stack_client/types/post_training_preference_optimize_params.py index 11392907..f7d998eb 100644 --- a/src/llama_stack_client/types/post_training_preference_optimize_params.py +++ b/src/llama_stack_client/types/post_training_preference_optimize_params.py @@ -17,16 +17,22 @@ class PostTrainingPreferenceOptimizeParams(TypedDict, total=False): algorithm_config: Required[AlgorithmConfig] + """The algorithm configuration.""" finetuned_model: Required[str] + """The model to fine-tune.""" hyperparam_search_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + """The hyperparam search configuration.""" job_uuid: Required[str] + """The UUID of the job to create.""" logger_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + """The logger configuration.""" training_config: Required[TrainingConfig] + """The training configuration.""" class AlgorithmConfig(TypedDict, total=False): diff --git a/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py b/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py index ad298817..596ec18b 100644 --- a/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py +++ b/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py @@ -18,18 +18,25 @@ class PostTrainingSupervisedFineTuneParams(TypedDict, total=False): hyperparam_search_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + """The hyperparam search configuration.""" job_uuid: Required[str] + """The UUID of the job to create.""" logger_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + """The logger configuration.""" training_config: Required[TrainingConfig] + """The training configuration.""" algorithm_config: AlgorithmConfigParam + """The algorithm configuration.""" checkpoint_dir: str + """The directory to save checkpoint(s) to.""" model: str + """The model to fine-tune.""" class TrainingConfigDataConfig(TypedDict, total=False): diff --git a/src/llama_stack_client/types/response_create_params.py b/src/llama_stack_client/types/response_create_params.py index 5e5072f5..53cb6b62 100644 --- a/src/llama_stack_client/types/response_create_params.py +++ b/src/llama_stack_client/types/response_create_params.py @@ -2,16 +2,30 @@ from __future__ import annotations -from typing import Union, Iterable +from typing import Dict, List, Union, Iterable from typing_extensions import Literal, Required, TypeAlias, TypedDict __all__ = [ "ResponseCreateParamsBase", "InputUnionMember1", - "InputUnionMember1ContentUnionMember1", - "InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentText", - "InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentImage", + "InputUnionMember1OpenAIResponseOutputMessageWebSearchToolCall", + "InputUnionMember1OpenAIResponseOutputMessageFunctionToolCall", + "InputUnionMember1OpenAIResponseInputFunctionToolCallOutput", + "InputUnionMember1OpenAIResponseMessage", + "InputUnionMember1OpenAIResponseMessageContentUnionMember1", + "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText", + "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage", + "InputUnionMember1OpenAIResponseMessageContentUnionMember2", "Tool", + "ToolOpenAIResponseInputToolWebSearch", + "ToolOpenAIResponseInputToolFileSearch", + "ToolOpenAIResponseInputToolFileSearchRankingOptions", + "ToolOpenAIResponseInputToolFunction", + "ToolOpenAIResponseInputToolMcp", + "ToolOpenAIResponseInputToolMcpRequireApproval", + "ToolOpenAIResponseInputToolMcpRequireApprovalApprovalFilter", + "ToolOpenAIResponseInputToolMcpAllowedTools", + "ToolOpenAIResponseInputToolMcpAllowedToolsAllowedToolsFilter", "ResponseCreateParamsNonStreaming", "ResponseCreateParamsStreaming", ] @@ -24,6 +38,8 @@ class ResponseCreateParamsBase(TypedDict, total=False): model: Required[str] """The underlying LLM used for completions.""" + instructions: str + previous_response_id: str """ (Optional) if specified, the new response will be a continuation of the previous @@ -33,16 +49,56 @@ class ResponseCreateParamsBase(TypedDict, total=False): store: bool + temperature: float + tools: Iterable[Tool] -class InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentText(TypedDict, total=False): +class InputUnionMember1OpenAIResponseOutputMessageWebSearchToolCall(TypedDict, total=False): + id: Required[str] + + status: Required[str] + + type: Required[Literal["web_search_call"]] + + +class InputUnionMember1OpenAIResponseOutputMessageFunctionToolCall(TypedDict, total=False): + id: Required[str] + + arguments: Required[str] + + call_id: Required[str] + + name: Required[str] + + status: Required[str] + + type: Required[Literal["function_call"]] + + +class InputUnionMember1OpenAIResponseInputFunctionToolCallOutput(TypedDict, total=False): + call_id: Required[str] + + output: Required[str] + + type: Required[Literal["function_call_output"]] + + id: str + + status: str + + +class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText( + TypedDict, total=False +): text: Required[str] type: Required[Literal["input_text"]] -class InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentImage(TypedDict, total=False): +class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage( + TypedDict, total=False +): detail: Required[Literal["low", "high", "auto"]] type: Required[Literal["input_image"]] @@ -50,26 +106,118 @@ class InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentImage image_url: str -InputUnionMember1ContentUnionMember1: TypeAlias = Union[ - InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentText, - InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentImage, +InputUnionMember1OpenAIResponseMessageContentUnionMember1: TypeAlias = Union[ + InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText, + InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage, ] -class InputUnionMember1(TypedDict, total=False): - content: Required[Union[str, Iterable[InputUnionMember1ContentUnionMember1]]] +class InputUnionMember1OpenAIResponseMessageContentUnionMember2(TypedDict, total=False): + text: Required[str] + + type: Required[Literal["output_text"]] + + +class InputUnionMember1OpenAIResponseMessage(TypedDict, total=False): + content: Required[ + Union[ + str, + Iterable[InputUnionMember1OpenAIResponseMessageContentUnionMember1], + Iterable[InputUnionMember1OpenAIResponseMessageContentUnionMember2], + ] + ] role: Required[Literal["system", "developer", "user", "assistant"]] - type: Literal["message"] + type: Required[Literal["message"]] + + id: str + status: str -class Tool(TypedDict, total=False): + +InputUnionMember1: TypeAlias = Union[ + InputUnionMember1OpenAIResponseOutputMessageWebSearchToolCall, + InputUnionMember1OpenAIResponseOutputMessageFunctionToolCall, + InputUnionMember1OpenAIResponseInputFunctionToolCallOutput, + InputUnionMember1OpenAIResponseMessage, +] + + +class ToolOpenAIResponseInputToolWebSearch(TypedDict, total=False): type: Required[Literal["web_search", "web_search_preview_2025_03_11"]] search_context_size: str +class ToolOpenAIResponseInputToolFileSearchRankingOptions(TypedDict, total=False): + ranker: str + + score_threshold: float + + +class ToolOpenAIResponseInputToolFileSearch(TypedDict, total=False): + type: Required[Literal["file_search"]] + + vector_store_id: Required[List[str]] + + ranking_options: ToolOpenAIResponseInputToolFileSearchRankingOptions + + +class ToolOpenAIResponseInputToolFunction(TypedDict, total=False): + name: Required[str] + + type: Required[Literal["function"]] + + description: str + + parameters: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + + strict: bool + + +class ToolOpenAIResponseInputToolMcpRequireApprovalApprovalFilter(TypedDict, total=False): + always: List[str] + + never: List[str] + + +ToolOpenAIResponseInputToolMcpRequireApproval: TypeAlias = Union[ + Literal["always", "never"], ToolOpenAIResponseInputToolMcpRequireApprovalApprovalFilter +] + + +class ToolOpenAIResponseInputToolMcpAllowedToolsAllowedToolsFilter(TypedDict, total=False): + tool_names: List[str] + + +ToolOpenAIResponseInputToolMcpAllowedTools: TypeAlias = Union[ + List[str], ToolOpenAIResponseInputToolMcpAllowedToolsAllowedToolsFilter +] + + +class ToolOpenAIResponseInputToolMcp(TypedDict, total=False): + require_approval: Required[ToolOpenAIResponseInputToolMcpRequireApproval] + + server_label: Required[str] + + server_url: Required[str] + + type: Required[Literal["mcp"]] + + allowed_tools: ToolOpenAIResponseInputToolMcpAllowedTools + + headers: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + + +Tool: TypeAlias = Union[ + ToolOpenAIResponseInputToolWebSearch, + ToolOpenAIResponseInputToolFileSearch, + ToolOpenAIResponseInputToolFunction, + ToolOpenAIResponseInputToolMcp, +] + + class ResponseCreateParamsNonStreaming(ResponseCreateParamsBase, total=False): stream: Literal[False] diff --git a/src/llama_stack_client/types/response_object.py b/src/llama_stack_client/types/response_object.py index 6e1161e2..7a56af88 100644 --- a/src/llama_stack_client/types/response_object.py +++ b/src/llama_stack_client/types/response_object.py @@ -9,30 +9,59 @@ __all__ = [ "ResponseObject", "Output", - "OutputOpenAIResponseOutputMessage", - "OutputOpenAIResponseOutputMessageContent", + "OutputOpenAIResponseMessage", + "OutputOpenAIResponseMessageContentUnionMember1", + "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText", + "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage", + "OutputOpenAIResponseMessageContentUnionMember2", "OutputOpenAIResponseOutputMessageWebSearchToolCall", + "OutputOpenAIResponseOutputMessageFunctionToolCall", "Error", ] -class OutputOpenAIResponseOutputMessageContent(BaseModel): +class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText(BaseModel): text: str - type: Literal["output_text"] + type: Literal["input_text"] -class OutputOpenAIResponseOutputMessage(BaseModel): - id: str +class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage(BaseModel): + detail: Literal["low", "high", "auto"] - content: List[OutputOpenAIResponseOutputMessageContent] + type: Literal["input_image"] - role: Literal["assistant"] + image_url: Optional[str] = None - status: str + +OutputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[ + Union[ + OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText, + OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage, + ], + PropertyInfo(discriminator="type"), +] + + +class OutputOpenAIResponseMessageContentUnionMember2(BaseModel): + text: str + + type: Literal["output_text"] + + +class OutputOpenAIResponseMessage(BaseModel): + content: Union[ + str, List[OutputOpenAIResponseMessageContentUnionMember1], List[OutputOpenAIResponseMessageContentUnionMember2] + ] + + role: Literal["system", "developer", "user", "assistant"] type: Literal["message"] + id: Optional[str] = None + + status: Optional[str] = None + class OutputOpenAIResponseOutputMessageWebSearchToolCall(BaseModel): id: str @@ -42,8 +71,26 @@ class OutputOpenAIResponseOutputMessageWebSearchToolCall(BaseModel): type: Literal["web_search_call"] +class OutputOpenAIResponseOutputMessageFunctionToolCall(BaseModel): + id: str + + arguments: str + + call_id: str + + name: str + + status: str + + type: Literal["function_call"] + + Output: TypeAlias = Annotated[ - Union[OutputOpenAIResponseOutputMessage, OutputOpenAIResponseOutputMessageWebSearchToolCall], + Union[ + OutputOpenAIResponseMessage, + OutputOpenAIResponseOutputMessageWebSearchToolCall, + OutputOpenAIResponseOutputMessageFunctionToolCall, + ], PropertyInfo(discriminator="type"), ] @@ -55,6 +102,16 @@ class Error(BaseModel): class ResponseObject(BaseModel): + + def output_text(self) -> str: + texts: List[str] = [] + for output in self.output: + if output.type == "message": + for content in output.content: + if content.type == "output_text": + texts.append(content.text) + return "".join(texts) + id: str created_at: int diff --git a/src/llama_stack_client/types/safety_run_shield_params.py b/src/llama_stack_client/types/safety_run_shield_params.py index d2eab4c0..764be674 100644 --- a/src/llama_stack_client/types/safety_run_shield_params.py +++ b/src/llama_stack_client/types/safety_run_shield_params.py @@ -12,7 +12,10 @@ class SafetyRunShieldParams(TypedDict, total=False): messages: Required[Iterable[Message]] + """The messages to run the shield on.""" params: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + """The parameters of the shield.""" shield_id: Required[str] + """The identifier of the shield to run.""" diff --git a/src/llama_stack_client/types/scoring_fn.py b/src/llama_stack_client/types/scoring_fn.py index 459f090b..3569cb44 100644 --- a/src/llama_stack_client/types/scoring_fn.py +++ b/src/llama_stack_client/types/scoring_fn.py @@ -17,8 +17,6 @@ class ScoringFn(BaseModel): provider_id: str - provider_resource_id: str - return_type: ReturnType type: Literal["scoring_function"] @@ -26,3 +24,5 @@ class ScoringFn(BaseModel): description: Optional[str] = None params: Optional[ScoringFnParams] = None + + provider_resource_id: Optional[str] = None diff --git a/src/llama_stack_client/types/scoring_fn_params.py b/src/llama_stack_client/types/scoring_fn_params.py index 6f4a62b0..a46b46f5 100644 --- a/src/llama_stack_client/types/scoring_fn_params.py +++ b/src/llama_stack_client/types/scoring_fn_params.py @@ -10,35 +10,29 @@ class LlmAsJudgeScoringFnParams(BaseModel): - judge_model: str + aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] - type: Literal["llm_as_judge"] + judge_model: str - aggregation_functions: Optional[ - List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] - ] = None + judge_score_regexes: List[str] - judge_score_regexes: Optional[List[str]] = None + type: Literal["llm_as_judge"] prompt_template: Optional[str] = None class RegexParserScoringFnParams(BaseModel): - type: Literal["regex_parser"] + aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] - aggregation_functions: Optional[ - List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] - ] = None + parsing_regexes: List[str] - parsing_regexes: Optional[List[str]] = None + type: Literal["regex_parser"] class BasicScoringFnParams(BaseModel): - type: Literal["basic"] + aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] - aggregation_functions: Optional[ - List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] - ] = None + type: Literal["basic"] ScoringFnParams: TypeAlias = Annotated[ diff --git a/src/llama_stack_client/types/scoring_fn_params_param.py b/src/llama_stack_client/types/scoring_fn_params_param.py index 4c255b52..b404bc89 100644 --- a/src/llama_stack_client/types/scoring_fn_params_param.py +++ b/src/llama_stack_client/types/scoring_fn_params_param.py @@ -9,29 +9,35 @@ class LlmAsJudgeScoringFnParams(TypedDict, total=False): - judge_model: Required[str] + aggregation_functions: Required[ + List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] + ] - type: Required[Literal["llm_as_judge"]] + judge_model: Required[str] - aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] + judge_score_regexes: Required[List[str]] - judge_score_regexes: List[str] + type: Required[Literal["llm_as_judge"]] prompt_template: str class RegexParserScoringFnParams(TypedDict, total=False): - type: Required[Literal["regex_parser"]] + aggregation_functions: Required[ + List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] + ] - aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] + parsing_regexes: Required[List[str]] - parsing_regexes: List[str] + type: Required[Literal["regex_parser"]] class BasicScoringFnParams(TypedDict, total=False): - type: Required[Literal["basic"]] + aggregation_functions: Required[ + List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] + ] - aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]] + type: Required[Literal["basic"]] ScoringFnParamsParam: TypeAlias = Union[LlmAsJudgeScoringFnParams, RegexParserScoringFnParams, BasicScoringFnParams] diff --git a/src/llama_stack_client/types/scoring_function_register_params.py b/src/llama_stack_client/types/scoring_function_register_params.py index 6c9cb1da..c9932710 100644 --- a/src/llama_stack_client/types/scoring_function_register_params.py +++ b/src/llama_stack_client/types/scoring_function_register_params.py @@ -12,13 +12,21 @@ class ScoringFunctionRegisterParams(TypedDict, total=False): description: Required[str] + """The description of the scoring function.""" return_type: Required[ReturnType] scoring_fn_id: Required[str] + """The ID of the scoring function to register.""" params: ScoringFnParamsParam + """ + The parameters for the scoring function for benchmark eval, these can be + overridden for app eval. + """ provider_id: str + """The ID of the provider to use for the scoring function.""" provider_scoring_fn_id: str + """The ID of the provider scoring function to use for the scoring function.""" diff --git a/src/llama_stack_client/types/scoring_score_batch_params.py b/src/llama_stack_client/types/scoring_score_batch_params.py index aa12ac8c..28dfa86e 100644 --- a/src/llama_stack_client/types/scoring_score_batch_params.py +++ b/src/llama_stack_client/types/scoring_score_batch_params.py @@ -12,7 +12,10 @@ class ScoringScoreBatchParams(TypedDict, total=False): dataset_id: Required[str] + """The ID of the dataset to score.""" save_results_dataset: Required[bool] + """Whether to save the results to a dataset.""" scoring_functions: Required[Dict[str, Optional[ScoringFnParamsParam]]] + """The scoring functions to use for the scoring.""" diff --git a/src/llama_stack_client/types/shared/query_config.py b/src/llama_stack_client/types/shared/query_config.py index 679f7dcb..5695bc20 100644 --- a/src/llama_stack_client/types/shared/query_config.py +++ b/src/llama_stack_client/types/shared/query_config.py @@ -1,5 +1,7 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +from typing import Optional + from ..._models import BaseModel from .query_generator_config import QueryGeneratorConfig @@ -7,8 +9,22 @@ class QueryConfig(BaseModel): + chunk_template: str + """Template for formatting each retrieved chunk in the context. + + Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk + content string), {metadata} (chunk metadata dict). Default: "Result + {index}\nContent: {chunk.content}\nMetadata: {metadata}\n" + """ + max_chunks: int + """Maximum number of chunks to retrieve.""" max_tokens_in_context: int + """Maximum number of tokens in the context.""" query_generator_config: QueryGeneratorConfig + """Configuration for the query generator.""" + + mode: Optional[str] = None + """Search mode for retrieval—either "vector" or "keyword". Default "vector".""" diff --git a/src/llama_stack_client/types/shared_params/query_config.py b/src/llama_stack_client/types/shared_params/query_config.py index 94cb1b8c..ded8ff9e 100644 --- a/src/llama_stack_client/types/shared_params/query_config.py +++ b/src/llama_stack_client/types/shared_params/query_config.py @@ -10,8 +10,22 @@ class QueryConfig(TypedDict, total=False): + chunk_template: Required[str] + """Template for formatting each retrieved chunk in the context. + + Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk + content string), {metadata} (chunk metadata dict). Default: "Result + {index}\nContent: {chunk.content}\nMetadata: {metadata}\n" + """ + max_chunks: Required[int] + """Maximum number of chunks to retrieve.""" max_tokens_in_context: Required[int] + """Maximum number of tokens in the context.""" query_generator_config: Required[QueryGeneratorConfig] + """Configuration for the query generator.""" + + mode: str + """Search mode for retrieval—either "vector" or "keyword". Default "vector".""" diff --git a/src/llama_stack_client/types/shield.py b/src/llama_stack_client/types/shield.py index 978bd255..ff5f01f1 100644 --- a/src/llama_stack_client/types/shield.py +++ b/src/llama_stack_client/types/shield.py @@ -13,8 +13,8 @@ class Shield(BaseModel): provider_id: str - provider_resource_id: str - type: Literal["shield"] params: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None + + provider_resource_id: Optional[str] = None diff --git a/src/llama_stack_client/types/shield_call_step.py b/src/llama_stack_client/types/shield_call_step.py index 38ca0b5c..e19734c6 100644 --- a/src/llama_stack_client/types/shield_call_step.py +++ b/src/llama_stack_client/types/shield_call_step.py @@ -15,6 +15,7 @@ class ShieldCallStep(BaseModel): """The ID of the step.""" step_type: Literal["shield_call"] + """Type of the step in an agent turn.""" turn_id: str """The ID of the turn.""" diff --git a/src/llama_stack_client/types/shield_register_params.py b/src/llama_stack_client/types/shield_register_params.py index 2a51fd04..7ae0b2c1 100644 --- a/src/llama_stack_client/types/shield_register_params.py +++ b/src/llama_stack_client/types/shield_register_params.py @@ -10,9 +10,13 @@ class ShieldRegisterParams(TypedDict, total=False): shield_id: Required[str] + """The identifier of the shield to register.""" params: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + """The parameters of the shield.""" provider_id: str + """The identifier of the provider.""" provider_shield_id: str + """The identifier of the shield in the provider.""" diff --git a/src/llama_stack_client/types/telemetry_get_span_tree_params.py b/src/llama_stack_client/types/telemetry_get_span_tree_params.py index d9d647dd..7d309d3e 100644 --- a/src/llama_stack_client/types/telemetry_get_span_tree_params.py +++ b/src/llama_stack_client/types/telemetry_get_span_tree_params.py @@ -10,5 +10,7 @@ class TelemetryGetSpanTreeParams(TypedDict, total=False): attributes_to_return: List[str] + """The attributes to return in the tree.""" max_depth: int + """The maximum depth of the tree.""" diff --git a/src/llama_stack_client/types/telemetry_log_event_params.py b/src/llama_stack_client/types/telemetry_log_event_params.py index ef536792..246b6526 100644 --- a/src/llama_stack_client/types/telemetry_log_event_params.py +++ b/src/llama_stack_client/types/telemetry_log_event_params.py @@ -11,5 +11,7 @@ class TelemetryLogEventParams(TypedDict, total=False): event: Required[EventParam] + """The event to log.""" ttl_seconds: Required[int] + """The time to live of the event.""" diff --git a/src/llama_stack_client/types/telemetry_query_spans_params.py b/src/llama_stack_client/types/telemetry_query_spans_params.py index 897a024a..6429c08f 100644 --- a/src/llama_stack_client/types/telemetry_query_spans_params.py +++ b/src/llama_stack_client/types/telemetry_query_spans_params.py @@ -12,7 +12,10 @@ class TelemetryQuerySpansParams(TypedDict, total=False): attribute_filters: Required[Iterable[QueryConditionParam]] + """The attribute filters to apply to the spans.""" attributes_to_return: Required[List[str]] + """The attributes to return in the spans.""" max_depth: int + """The maximum depth of the tree.""" diff --git a/src/llama_stack_client/types/telemetry_query_traces_params.py b/src/llama_stack_client/types/telemetry_query_traces_params.py index 425b1a00..7c82ef14 100644 --- a/src/llama_stack_client/types/telemetry_query_traces_params.py +++ b/src/llama_stack_client/types/telemetry_query_traces_params.py @@ -12,9 +12,13 @@ class TelemetryQueryTracesParams(TypedDict, total=False): attribute_filters: Iterable[QueryConditionParam] + """The attribute filters to apply to the traces.""" limit: int + """The limit of traces to return.""" offset: int + """The offset of the traces to return.""" order_by: List[str] + """The order by of the traces to return.""" diff --git a/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py b/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py index 5dc8419c..bb96f8e3 100644 --- a/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py +++ b/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py @@ -12,9 +12,13 @@ class TelemetrySaveSpansToDatasetParams(TypedDict, total=False): attribute_filters: Required[Iterable[QueryConditionParam]] + """The attribute filters to apply to the spans.""" attributes_to_save: Required[List[str]] + """The attributes to save to the dataset.""" dataset_id: Required[str] + """The ID of the dataset to save the spans to.""" max_depth: int + """The maximum depth of the tree.""" diff --git a/src/llama_stack_client/types/tool.py b/src/llama_stack_client/types/tool.py index c1c0cce7..59e9300f 100644 --- a/src/llama_stack_client/types/tool.py +++ b/src/llama_stack_client/types/tool.py @@ -29,8 +29,6 @@ class Tool(BaseModel): provider_id: str - provider_resource_id: str - tool_host: Literal["distribution", "client", "model_context_protocol"] toolgroup_id: str @@ -38,3 +36,5 @@ class Tool(BaseModel): type: Literal["tool"] metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None + + provider_resource_id: Optional[str] = None diff --git a/src/llama_stack_client/types/tool_execution_step.py b/src/llama_stack_client/types/tool_execution_step.py index 5eb2c23b..f68115fc 100644 --- a/src/llama_stack_client/types/tool_execution_step.py +++ b/src/llama_stack_client/types/tool_execution_step.py @@ -16,6 +16,7 @@ class ToolExecutionStep(BaseModel): """The ID of the step.""" step_type: Literal["tool_execution"] + """Type of the step in an agent turn.""" tool_calls: List[ToolCall] """The tool calls to execute.""" diff --git a/src/llama_stack_client/types/tool_group.py b/src/llama_stack_client/types/tool_group.py index 480d1942..3389395a 100644 --- a/src/llama_stack_client/types/tool_group.py +++ b/src/llama_stack_client/types/tool_group.py @@ -17,10 +17,10 @@ class ToolGroup(BaseModel): provider_id: str - provider_resource_id: str - type: Literal["tool_group"] args: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None mcp_endpoint: Optional[McpEndpoint] = None + + provider_resource_id: Optional[str] = None diff --git a/src/llama_stack_client/types/tool_list_params.py b/src/llama_stack_client/types/tool_list_params.py index c0953896..38f4bf73 100644 --- a/src/llama_stack_client/types/tool_list_params.py +++ b/src/llama_stack_client/types/tool_list_params.py @@ -9,3 +9,4 @@ class ToolListParams(TypedDict, total=False): toolgroup_id: str + """The ID of the tool group to list tools for.""" diff --git a/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py b/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py index 08208b77..4599c693 100644 --- a/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py +++ b/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py @@ -18,3 +18,4 @@ class RagToolQueryParams(TypedDict, total=False): vector_db_ids: Required[List[str]] query_config: QueryConfig + """Configuration for the RAG query generation.""" diff --git a/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py b/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py index 68b40462..03df2d40 100644 --- a/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py +++ b/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py @@ -10,5 +10,7 @@ class ToolRuntimeInvokeToolParams(TypedDict, total=False): kwargs: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + """A dictionary of arguments to pass to the tool.""" tool_name: Required[str] + """The name of the tool to invoke.""" diff --git a/src/llama_stack_client/types/tool_runtime_list_tools_params.py b/src/llama_stack_client/types/tool_runtime_list_tools_params.py index 99da7533..539e176d 100644 --- a/src/llama_stack_client/types/tool_runtime_list_tools_params.py +++ b/src/llama_stack_client/types/tool_runtime_list_tools_params.py @@ -9,8 +9,10 @@ class ToolRuntimeListToolsParams(TypedDict, total=False): mcp_endpoint: McpEndpoint + """The MCP endpoint to use for the tool group.""" tool_group_id: str + """The ID of the tool group to list tools for.""" class McpEndpoint(TypedDict, total=False): diff --git a/src/llama_stack_client/types/toolgroup_register_params.py b/src/llama_stack_client/types/toolgroup_register_params.py index 8cb7af7f..a50c14c4 100644 --- a/src/llama_stack_client/types/toolgroup_register_params.py +++ b/src/llama_stack_client/types/toolgroup_register_params.py @@ -10,12 +10,16 @@ class ToolgroupRegisterParams(TypedDict, total=False): provider_id: Required[str] + """The ID of the provider to use for the tool group.""" toolgroup_id: Required[str] + """The ID of the tool group to register.""" args: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + """A dictionary of arguments to pass to the tool group.""" mcp_endpoint: McpEndpoint + """The MCP endpoint to use for the tool group.""" class McpEndpoint(TypedDict, total=False): diff --git a/src/llama_stack_client/types/vector_db_list_response.py b/src/llama_stack_client/types/vector_db_list_response.py index 0a110e2b..39161431 100644 --- a/src/llama_stack_client/types/vector_db_list_response.py +++ b/src/llama_stack_client/types/vector_db_list_response.py @@ -1,6 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import List +from typing import List, Optional from typing_extensions import Literal, TypeAlias from .._models import BaseModel @@ -17,9 +17,9 @@ class VectorDBListResponseItem(BaseModel): provider_id: str - provider_resource_id: str - type: Literal["vector_db"] + provider_resource_id: Optional[str] = None + VectorDBListResponse: TypeAlias = List[VectorDBListResponseItem] diff --git a/src/llama_stack_client/types/vector_db_register_params.py b/src/llama_stack_client/types/vector_db_register_params.py index 6083bbce..734659a6 100644 --- a/src/llama_stack_client/types/vector_db_register_params.py +++ b/src/llama_stack_client/types/vector_db_register_params.py @@ -9,11 +9,16 @@ class VectorDBRegisterParams(TypedDict, total=False): embedding_model: Required[str] + """The embedding model to use.""" vector_db_id: Required[str] + """The identifier of the vector database to register.""" embedding_dimension: int + """The dimension of the embedding model.""" provider_id: str + """The identifier of the provider.""" provider_vector_db_id: str + """The identifier of the vector database in the provider.""" diff --git a/src/llama_stack_client/types/vector_db_register_response.py b/src/llama_stack_client/types/vector_db_register_response.py index cc4c201a..9c7a3166 100644 --- a/src/llama_stack_client/types/vector_db_register_response.py +++ b/src/llama_stack_client/types/vector_db_register_response.py @@ -1,5 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +from typing import Optional from typing_extensions import Literal from .._models import BaseModel @@ -16,6 +17,6 @@ class VectorDBRegisterResponse(BaseModel): provider_id: str - provider_resource_id: str - type: Literal["vector_db"] + + provider_resource_id: Optional[str] = None diff --git a/src/llama_stack_client/types/vector_db_retrieve_response.py b/src/llama_stack_client/types/vector_db_retrieve_response.py index 3bea2236..fb3597a5 100644 --- a/src/llama_stack_client/types/vector_db_retrieve_response.py +++ b/src/llama_stack_client/types/vector_db_retrieve_response.py @@ -1,5 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +from typing import Optional from typing_extensions import Literal from .._models import BaseModel @@ -16,6 +17,6 @@ class VectorDBRetrieveResponse(BaseModel): provider_id: str - provider_resource_id: str - type: Literal["vector_db"] + + provider_resource_id: Optional[str] = None diff --git a/src/llama_stack_client/types/vector_io_insert_params.py b/src/llama_stack_client/types/vector_io_insert_params.py index 5ac67c10..adc1642e 100644 --- a/src/llama_stack_client/types/vector_io_insert_params.py +++ b/src/llama_stack_client/types/vector_io_insert_params.py @@ -12,10 +12,13 @@ class VectorIoInsertParams(TypedDict, total=False): chunks: Required[Iterable[Chunk]] + """The chunks to insert.""" vector_db_id: Required[str] + """The identifier of the vector database to insert the chunks into.""" ttl_seconds: int + """The time to live of the chunks.""" class Chunk(TypedDict, total=False): diff --git a/src/llama_stack_client/types/vector_io_query_params.py b/src/llama_stack_client/types/vector_io_query_params.py index 2fe675d1..f0569a58 100644 --- a/src/llama_stack_client/types/vector_io_query_params.py +++ b/src/llama_stack_client/types/vector_io_query_params.py @@ -12,8 +12,10 @@ class VectorIoQueryParams(TypedDict, total=False): query: Required[InterleavedContent] - """A image content item""" + """The query to search for.""" vector_db_id: Required[str] + """The identifier of the vector database to query.""" params: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + """The parameters of the query.""" diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py index 5c3d96c3..ff450202 100644 --- a/tests/api_resources/chat/test_completions.py +++ b/tests/api_resources/chat/test_completions.py @@ -9,7 +9,11 @@ from tests.utils import assert_matches_type from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient -from llama_stack_client.types.chat import CompletionCreateResponse +from llama_stack_client.types.chat import ( + CompletionListResponse, + CompletionCreateResponse, + CompletionRetrieveResponse, +) base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") @@ -187,6 +191,79 @@ def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> assert cast(Any, response.is_closed) is True + @parametrize + def test_method_retrieve(self, client: LlamaStackClient) -> None: + completion = client.chat.completions.retrieve( + "completion_id", + ) + assert_matches_type(CompletionRetrieveResponse, completion, path=["response"]) + + @parametrize + def test_raw_response_retrieve(self, client: LlamaStackClient) -> None: + response = client.chat.completions.with_raw_response.retrieve( + "completion_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + completion = response.parse() + assert_matches_type(CompletionRetrieveResponse, completion, path=["response"]) + + @parametrize + def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None: + with client.chat.completions.with_streaming_response.retrieve( + "completion_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + completion = response.parse() + assert_matches_type(CompletionRetrieveResponse, completion, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_retrieve(self, client: LlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"): + client.chat.completions.with_raw_response.retrieve( + "", + ) + + @parametrize + def test_method_list(self, client: LlamaStackClient) -> None: + completion = client.chat.completions.list() + assert_matches_type(CompletionListResponse, completion, path=["response"]) + + @parametrize + def test_method_list_with_all_params(self, client: LlamaStackClient) -> None: + completion = client.chat.completions.list( + after="after", + limit=0, + model="model", + order="asc", + ) + assert_matches_type(CompletionListResponse, completion, path=["response"]) + + @parametrize + def test_raw_response_list(self, client: LlamaStackClient) -> None: + response = client.chat.completions.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + completion = response.parse() + assert_matches_type(CompletionListResponse, completion, path=["response"]) + + @parametrize + def test_streaming_response_list(self, client: LlamaStackClient) -> None: + with client.chat.completions.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + completion = response.parse() + assert_matches_type(CompletionListResponse, completion, path=["response"]) + + assert cast(Any, response.is_closed) is True + class TestAsyncCompletions: parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) @@ -360,3 +437,76 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncLla await stream.close() assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + completion = await async_client.chat.completions.retrieve( + "completion_id", + ) + assert_matches_type(CompletionRetrieveResponse, completion, path=["response"]) + + @parametrize + async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.chat.completions.with_raw_response.retrieve( + "completion_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + completion = await response.parse() + assert_matches_type(CompletionRetrieveResponse, completion, path=["response"]) + + @parametrize + async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.chat.completions.with_streaming_response.retrieve( + "completion_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + completion = await response.parse() + assert_matches_type(CompletionRetrieveResponse, completion, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"): + await async_client.chat.completions.with_raw_response.retrieve( + "", + ) + + @parametrize + async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None: + completion = await async_client.chat.completions.list() + assert_matches_type(CompletionListResponse, completion, path=["response"]) + + @parametrize + async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: + completion = await async_client.chat.completions.list( + after="after", + limit=0, + model="model", + order="asc", + ) + assert_matches_type(CompletionListResponse, completion, path=["response"]) + + @parametrize + async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.chat.completions.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + completion = await response.parse() + assert_matches_type(CompletionListResponse, completion, path=["response"]) + + @parametrize + async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.chat.completions.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + completion = await response.parse() + assert_matches_type(CompletionListResponse, completion, path=["response"]) + + assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/test_eval.py b/tests/api_resources/test_eval.py index c519056b..8d04c104 100644 --- a/tests/api_resources/test_eval.py +++ b/tests/api_resources/test_eval.py @@ -32,7 +32,9 @@ def test_method_evaluate_rows(self, client: LlamaStackClient) -> None: }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -63,10 +65,10 @@ def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> }, "scoring_params": { "foo": { - "judge_model": "judge_model", - "type": "llm_as_judge", "aggregation_functions": ["average"], + "judge_model": "judge_model", "judge_score_regexes": ["string"], + "type": "llm_as_judge", "prompt_template": "prompt_template", } }, @@ -89,7 +91,9 @@ def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None: }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -115,7 +119,9 @@ def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> Non }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -144,7 +150,9 @@ def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None: }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -165,7 +173,9 @@ def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None: }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -196,10 +206,10 @@ def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClie }, "scoring_params": { "foo": { - "judge_model": "judge_model", - "type": "llm_as_judge", "aggregation_functions": ["average"], + "judge_model": "judge_model", "judge_score_regexes": ["string"], + "type": "llm_as_judge", "prompt_template": "prompt_template", } }, @@ -222,7 +232,9 @@ def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> Non }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -248,7 +260,9 @@ def test_streaming_response_evaluate_rows_alpha(self, client: LlamaStackClient) }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -277,7 +291,9 @@ def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -298,7 +314,9 @@ def test_method_run_eval(self, client: LlamaStackClient) -> None: }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -327,10 +345,10 @@ def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None }, "scoring_params": { "foo": { - "judge_model": "judge_model", - "type": "llm_as_judge", "aggregation_functions": ["average"], + "judge_model": "judge_model", "judge_score_regexes": ["string"], + "type": "llm_as_judge", "prompt_template": "prompt_template", } }, @@ -351,7 +369,9 @@ def test_raw_response_run_eval(self, client: LlamaStackClient) -> None: }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -375,7 +395,9 @@ def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None: }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -402,7 +424,9 @@ def test_path_params_run_eval(self, client: LlamaStackClient) -> None: }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -421,7 +445,9 @@ def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None: }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -450,10 +476,10 @@ def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) - }, "scoring_params": { "foo": { - "judge_model": "judge_model", - "type": "llm_as_judge", "aggregation_functions": ["average"], + "judge_model": "judge_model", "judge_score_regexes": ["string"], + "type": "llm_as_judge", "prompt_template": "prompt_template", } }, @@ -474,7 +500,9 @@ def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None: }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -498,7 +526,9 @@ def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> No }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -525,7 +555,9 @@ def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None: }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -548,7 +580,9 @@ async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) - }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -579,10 +613,10 @@ async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLla }, "scoring_params": { "foo": { - "judge_model": "judge_model", - "type": "llm_as_judge", "aggregation_functions": ["average"], + "judge_model": "judge_model", "judge_score_regexes": ["string"], + "type": "llm_as_judge", "prompt_template": "prompt_template", } }, @@ -605,7 +639,9 @@ async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackCli }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -631,7 +667,9 @@ async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaSt }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -660,7 +698,9 @@ async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClie }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -681,7 +721,9 @@ async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackCli }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -712,10 +754,10 @@ async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: As }, "scoring_params": { "foo": { - "judge_model": "judge_model", - "type": "llm_as_judge", "aggregation_functions": ["average"], + "judge_model": "judge_model", "judge_score_regexes": ["string"], + "type": "llm_as_judge", "prompt_template": "prompt_template", } }, @@ -738,7 +780,9 @@ async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaSt }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -764,7 +808,9 @@ async def test_streaming_response_evaluate_rows_alpha(self, async_client: AsyncL }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -793,7 +839,9 @@ async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaSta }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -814,7 +862,9 @@ async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> Non }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -843,10 +893,10 @@ async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaSta }, "scoring_params": { "foo": { - "judge_model": "judge_model", - "type": "llm_as_judge", "aggregation_functions": ["average"], + "judge_model": "judge_model", "judge_score_regexes": ["string"], + "type": "llm_as_judge", "prompt_template": "prompt_template", } }, @@ -867,7 +917,9 @@ async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -891,7 +943,9 @@ async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackCl }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -918,7 +972,9 @@ async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) - }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -937,7 +993,9 @@ async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient) }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -966,10 +1024,10 @@ async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLl }, "scoring_params": { "foo": { - "judge_model": "judge_model", - "type": "llm_as_judge", "aggregation_functions": ["average"], + "judge_model": "judge_model", "judge_score_regexes": ["string"], + "type": "llm_as_judge", "prompt_template": "prompt_template", } }, @@ -990,7 +1048,9 @@ async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackCl }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -1014,7 +1074,9 @@ async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaS }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -1041,7 +1103,9 @@ async def test_path_params_run_eval_alpha(self, async_client: AsyncLlamaStackCli }, "scoring_params": { "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py index 0a2a0929..74e13e8d 100644 --- a/tests/api_resources/test_responses.py +++ b/tests/api_resources/test_responses.py @@ -30,9 +30,11 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient response = client.responses.create( input="string", model="model", + instructions="instructions", previous_response_id="previous_response_id", store=True, stream=False, + temperature=0, tools=[ { "type": "web_search", @@ -83,8 +85,10 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient input="string", model="model", stream=True, + instructions="instructions", previous_response_id="previous_response_id", store=True, + temperature=0, tools=[ { "type": "web_search", @@ -124,14 +128,14 @@ def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> @parametrize def test_method_retrieve(self, client: LlamaStackClient) -> None: response = client.responses.retrieve( - "id", + "response_id", ) assert_matches_type(ResponseObject, response, path=["response"]) @parametrize def test_raw_response_retrieve(self, client: LlamaStackClient) -> None: http_response = client.responses.with_raw_response.retrieve( - "id", + "response_id", ) assert http_response.is_closed is True @@ -142,7 +146,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None: @parametrize def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None: with client.responses.with_streaming_response.retrieve( - "id", + "response_id", ) as http_response: assert not http_response.is_closed assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -154,7 +158,7 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None: @parametrize def test_path_params_retrieve(self, client: LlamaStackClient) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"): client.responses.with_raw_response.retrieve( "", ) @@ -176,9 +180,11 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn response = await async_client.responses.create( input="string", model="model", + instructions="instructions", previous_response_id="previous_response_id", store=True, stream=False, + temperature=0, tools=[ { "type": "web_search", @@ -229,8 +235,10 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn input="string", model="model", stream=True, + instructions="instructions", previous_response_id="previous_response_id", store=True, + temperature=0, tools=[ { "type": "web_search", @@ -270,14 +278,14 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncLla @parametrize async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.responses.retrieve( - "id", + "response_id", ) assert_matches_type(ResponseObject, response, path=["response"]) @parametrize async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: http_response = await async_client.responses.with_raw_response.retrieve( - "id", + "response_id", ) assert http_response.is_closed is True @@ -288,7 +296,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) @parametrize async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.responses.with_streaming_response.retrieve( - "id", + "response_id", ) as http_response: assert not http_response.is_closed assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -300,7 +308,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl @parametrize async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"): await async_client.responses.with_raw_response.retrieve( "", ) diff --git a/tests/api_resources/test_scoring.py b/tests/api_resources/test_scoring.py index da61ebcb..ca818363 100644 --- a/tests/api_resources/test_scoring.py +++ b/tests/api_resources/test_scoring.py @@ -26,7 +26,9 @@ def test_method_score(self, client: LlamaStackClient) -> None: input_rows=[{"foo": True}], scoring_functions={ "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -39,7 +41,9 @@ def test_raw_response_score(self, client: LlamaStackClient) -> None: input_rows=[{"foo": True}], scoring_functions={ "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -56,7 +60,9 @@ def test_streaming_response_score(self, client: LlamaStackClient) -> None: input_rows=[{"foo": True}], scoring_functions={ "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -76,7 +82,9 @@ def test_method_score_batch(self, client: LlamaStackClient) -> None: save_results_dataset=True, scoring_functions={ "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -90,7 +98,9 @@ def test_raw_response_score_batch(self, client: LlamaStackClient) -> None: save_results_dataset=True, scoring_functions={ "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -108,7 +118,9 @@ def test_streaming_response_score_batch(self, client: LlamaStackClient) -> None: save_results_dataset=True, scoring_functions={ "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -131,7 +143,9 @@ async def test_method_score(self, async_client: AsyncLlamaStackClient) -> None: input_rows=[{"foo": True}], scoring_functions={ "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -144,7 +158,9 @@ async def test_raw_response_score(self, async_client: AsyncLlamaStackClient) -> input_rows=[{"foo": True}], scoring_functions={ "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -161,7 +177,9 @@ async def test_streaming_response_score(self, async_client: AsyncLlamaStackClien input_rows=[{"foo": True}], scoring_functions={ "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -181,7 +199,9 @@ async def test_method_score_batch(self, async_client: AsyncLlamaStackClient) -> save_results_dataset=True, scoring_functions={ "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -195,7 +215,9 @@ async def test_raw_response_score_batch(self, async_client: AsyncLlamaStackClien save_results_dataset=True, scoring_functions={ "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, @@ -213,7 +235,9 @@ async def test_streaming_response_score_batch(self, async_client: AsyncLlamaStac save_results_dataset=True, scoring_functions={ "foo": { + "aggregation_functions": ["average"], "judge_model": "judge_model", + "judge_score_regexes": ["string"], "type": "llm_as_judge", } }, diff --git a/tests/api_resources/test_scoring_functions.py b/tests/api_resources/test_scoring_functions.py index 5806bf59..d58d5c60 100644 --- a/tests/api_resources/test_scoring_functions.py +++ b/tests/api_resources/test_scoring_functions.py @@ -99,10 +99,10 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None return_type={"type": "string"}, scoring_fn_id="scoring_fn_id", params={ - "judge_model": "judge_model", - "type": "llm_as_judge", "aggregation_functions": ["average"], + "judge_model": "judge_model", "judge_score_regexes": ["string"], + "type": "llm_as_judge", "prompt_template": "prompt_template", }, provider_id="provider_id", @@ -221,10 +221,10 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta return_type={"type": "string"}, scoring_fn_id="scoring_fn_id", params={ - "judge_model": "judge_model", - "type": "llm_as_judge", "aggregation_functions": ["average"], + "judge_model": "judge_model", "judge_score_regexes": ["string"], + "type": "llm_as_judge", "prompt_template": "prompt_template", }, provider_id="provider_id", diff --git a/tests/api_resources/tool_runtime/test_rag_tool.py b/tests/api_resources/tool_runtime/test_rag_tool.py index e687df20..4169e085 100644 --- a/tests/api_resources/tool_runtime/test_rag_tool.py +++ b/tests/api_resources/tool_runtime/test_rag_tool.py @@ -86,12 +86,14 @@ def test_method_query_with_all_params(self, client: LlamaStackClient) -> None: content="string", vector_db_ids=["string"], query_config={ + "chunk_template": "chunk_template", "max_chunks": 0, "max_tokens_in_context": 0, "query_generator_config": { "separator": "separator", "type": "default", }, + "mode": "mode", }, ) assert_matches_type(QueryResult, rag_tool, path=["response"]) @@ -195,12 +197,14 @@ async def test_method_query_with_all_params(self, async_client: AsyncLlamaStackC content="string", vector_db_ids=["string"], query_config={ + "chunk_template": "chunk_template", "max_chunks": 0, "max_tokens_in_context": 0, "query_generator_config": { "separator": "separator", "type": "default", }, + "mode": "mode", }, ) assert_matches_type(QueryResult, rag_tool, path=["response"]) diff --git a/tests/test_utils/test_proxy.py b/tests/test_utils/test_proxy.py index 9cefe4ea..76a29efd 100644 --- a/tests/test_utils/test_proxy.py +++ b/tests/test_utils/test_proxy.py @@ -21,3 +21,14 @@ def test_recursive_proxy() -> None: assert dir(proxy) == [] assert type(proxy).__name__ == "RecursiveLazyProxy" assert type(operator.attrgetter("name.foo.bar.baz")(proxy)).__name__ == "RecursiveLazyProxy" + + +def test_isinstance_does_not_error() -> None: + class AlwaysErrorProxy(LazyProxy[Any]): + @override + def __load__(self) -> Any: + raise RuntimeError("Mocking missing dependency") + + proxy = AlwaysErrorProxy() + assert not isinstance(proxy, dict) + assert isinstance(proxy, LazyProxy)