From 9c15910b9fdddf1549c4b886f535911657a165f6 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Fri, 23 May 2025 11:13:15 -0700
Subject: [PATCH] Sync updates from stainless branch: ashwinb/dev

---
 src/llama_stack_client/__init__.py            |   5 +
 src/llama_stack_client/_utils/_proxy.py       |   5 +-
 .../_utils/_resources_proxy.py                |  24 +
 .../resources/agents/agents.py                |   4 +-
 .../resources/agents/session.py               |   4 +-
 .../resources/benchmarks.py                   |  34 +
 .../resources/chat/completions.py             | 466 ++++++++----
 .../resources/completions.py                  | 192 ++---
 src/llama_stack_client/resources/datasets.py  |  26 +-
 src/llama_stack_client/resources/inference.py | 136 ++--
 src/llama_stack_client/resources/inspect.py   |   4 +
 src/llama_stack_client/resources/models.py    |  34 +
 .../resources/post_training/job.py            |  26 +
 .../resources/post_training/post_training.py  |  60 ++
 src/llama_stack_client/resources/providers.py |   6 +
 src/llama_stack_client/resources/responses.py |  36 +-
 src/llama_stack_client/resources/routes.py    |   2 +
 src/llama_stack_client/resources/safety.py    |  16 +
 src/llama_stack_client/resources/scoring.py   |  16 +
 .../resources/scoring_functions.py            |  32 +
 src/llama_stack_client/resources/shields.py   |  26 +
 src/llama_stack_client/resources/telemetry.py |  88 +++
 .../resources/tool_runtime/rag_tool.py        |   4 +
 .../resources/tool_runtime/tool_runtime.py    |  24 +-
 .../resources/toolgroups.py                   |  32 +-
 src/llama_stack_client/resources/tools.py     |  12 +-
 .../resources/vector_dbs.py                   |  34 +
 src/llama_stack_client/resources/vector_io.py |  32 +-
 src/llama_stack_client/types/benchmark.py     |   6 +-
 .../types/benchmark_register_params.py        |   6 +
 src/llama_stack_client/types/chat/__init__.py |   3 +
 .../types/chat/completion_create_params.py    |  46 +-
 .../types/chat/completion_list_params.py      |  21 +
 .../types/chat/completion_list_response.py    | 667 ++++++++++++++++++
 .../chat/completion_retrieve_response.py      | 626 ++++++++++++++++
 .../types/completion_create_params.py         |  34 +-
 .../types/dataset_list_response.py            |   6 +-
 .../types/dataset_register_params.py          |   4 +-
 .../types/dataset_register_response.py        |   6 +-
 .../types/dataset_retrieve_response.py        |   6 +-
 .../inference_batch_chat_completion_params.py |  17 +-
 .../inference_batch_completion_params.py      |  14 +-
 .../types/inference_chat_completion_params.py |   6 +-
 .../types/inference_completion_params.py      |   6 +-
 .../types/inference_step.py                   |   1 +
 .../types/memory_retrieval_step.py            |   1 +
 src/llama_stack_client/types/model.py         |   6 +-
 .../types/model_register_params.py            |   5 +
 .../post_training/job_artifacts_params.py     |   1 +
 .../types/post_training/job_cancel_params.py  |   1 +
 .../types/post_training/job_status_params.py  |   1 +
 ...ost_training_preference_optimize_params.py |   6 +
 ...st_training_supervised_fine_tune_params.py |   7 +
 .../types/response_create_params.py           | 174 ++++-
 .../types/response_object.py                  |  77 +-
 .../types/safety_run_shield_params.py         |   3 +
 src/llama_stack_client/types/scoring_fn.py    |   4 +-
 .../types/scoring_fn_params.py                |  24 +-
 .../types/scoring_fn_params_param.py          |  24 +-
 .../types/scoring_function_register_params.py |   8 +
 .../types/scoring_score_batch_params.py       |   3 +
 .../types/shared/query_config.py              |  16 +
 .../types/shared_params/query_config.py       |  14 +
 src/llama_stack_client/types/shield.py        |   4 +-
 .../types/shield_call_step.py                 |   1 +
 .../types/shield_register_params.py           |   4 +
 .../types/telemetry_get_span_tree_params.py   |   2 +
 .../types/telemetry_log_event_params.py       |   2 +
 .../types/telemetry_query_spans_params.py     |   3 +
 .../types/telemetry_query_traces_params.py    |   4 +
 .../telemetry_save_spans_to_dataset_params.py |   4 +
 src/llama_stack_client/types/tool.py          |   4 +-
 .../types/tool_execution_step.py              |   1 +
 src/llama_stack_client/types/tool_group.py    |   4 +-
 .../types/tool_list_params.py                 |   1 +
 .../tool_runtime/rag_tool_query_params.py     |   1 +
 .../types/tool_runtime_invoke_tool_params.py  |   2 +
 .../types/tool_runtime_list_tools_params.py   |   2 +
 .../types/toolgroup_register_params.py        |   4 +
 .../types/vector_db_list_response.py          |   6 +-
 .../types/vector_db_register_params.py        |   5 +
 .../types/vector_db_register_response.py      |   5 +-
 .../types/vector_db_retrieve_response.py      |   5 +-
 .../types/vector_io_insert_params.py          |   3 +
 .../types/vector_io_query_params.py           |   4 +-
 tests/api_resources/chat/test_completions.py  | 152 +++-
 tests/api_resources/test_eval.py              |  96 ++-
 tests/api_resources/test_responses.py         |  24 +-
 tests/api_resources/test_scoring.py           |  24 +
 tests/api_resources/test_scoring_functions.py |   8 +-
 .../tool_runtime/test_rag_tool.py             |   4 +
 tests/test_utils/test_proxy.py                |  11 +
 92 files changed, 3152 insertions(+), 468 deletions(-)
 create mode 100644 src/llama_stack_client/_utils/_resources_proxy.py
 create mode 100644 src/llama_stack_client/types/chat/completion_list_params.py
 create mode 100644 src/llama_stack_client/types/chat/completion_list_response.py
 create mode 100644 src/llama_stack_client/types/chat/completion_retrieve_response.py

diff --git a/src/llama_stack_client/__init__.py b/src/llama_stack_client/__init__.py
index 70ef01a4..7dc65e78 100644
--- a/src/llama_stack_client/__init__.py
+++ b/src/llama_stack_client/__init__.py
@@ -1,5 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+import typing as _t
+
 from . import types
 from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes
 from ._utils import file_from_path
@@ -84,6 +86,9 @@
     "DefaultAsyncHttpxClient",
 ]
 
+if not _t.TYPE_CHECKING:
+    from ._utils._resources_proxy import resources as resources
+
 _setup_logging()
 
 # Update the __module__ attribute for exported symbols so that
diff --git a/src/llama_stack_client/_utils/_proxy.py b/src/llama_stack_client/_utils/_proxy.py
index ffd883e9..0f239a33 100644
--- a/src/llama_stack_client/_utils/_proxy.py
+++ b/src/llama_stack_client/_utils/_proxy.py
@@ -46,7 +46,10 @@ def __dir__(self) -> Iterable[str]:
     @property  # type: ignore
     @override
     def __class__(self) -> type:  # pyright: ignore
-        proxied = self.__get_proxied__()
+        try:
+            proxied = self.__get_proxied__()
+        except Exception:
+            return type(self)
         if issubclass(type(proxied), LazyProxy):
             return type(proxied)
         return proxied.__class__
diff --git a/src/llama_stack_client/_utils/_resources_proxy.py b/src/llama_stack_client/_utils/_resources_proxy.py
new file mode 100644
index 00000000..bf0a876a
--- /dev/null
+++ b/src/llama_stack_client/_utils/_resources_proxy.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from typing import Any
+from typing_extensions import override
+
+from ._proxy import LazyProxy
+
+
+class ResourcesProxy(LazyProxy[Any]):
+    """A proxy for the `llama_stack_client.resources` module.
+
+    This is used so that we can lazily import `llama_stack_client.resources` only when
+    needed *and* so that users can just import `llama_stack_client` and reference `llama_stack_client.resources`
+    """
+
+    @override
+    def __load__(self) -> Any:
+        import importlib
+
+        mod = importlib.import_module("llama_stack_client.resources")
+        return mod
+
+
+resources = ResourcesProxy().__as_proxied__()
diff --git a/src/llama_stack_client/resources/agents/agents.py b/src/llama_stack_client/resources/agents/agents.py
index ed03dde5..9c06cbd1 100644
--- a/src/llama_stack_client/resources/agents/agents.py
+++ b/src/llama_stack_client/resources/agents/agents.py
@@ -124,7 +124,7 @@ def delete(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
-        Delete an agent by its ID.
+        Delete an agent by its ID and its associated sessions and turns.
 
         Args:
           extra_headers: Send extra headers
@@ -225,7 +225,7 @@ async def delete(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
-        Delete an agent by its ID.
+        Delete an agent by its ID and its associated sessions and turns.
 
         Args:
           extra_headers: Send extra headers
diff --git a/src/llama_stack_client/resources/agents/session.py b/src/llama_stack_client/resources/agents/session.py
index 0aec7449..ebdde5c6 100644
--- a/src/llama_stack_client/resources/agents/session.py
+++ b/src/llama_stack_client/resources/agents/session.py
@@ -137,7 +137,7 @@ def delete(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
-        Delete an agent session by its ID.
+        Delete an agent session by its ID and its associated turns.
 
         Args:
           extra_headers: Send extra headers
@@ -277,7 +277,7 @@ async def delete(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
-        Delete an agent session by its ID.
+        Delete an agent session by its ID and its associated turns.
 
         Args:
           extra_headers: Send extra headers
diff --git a/src/llama_stack_client/resources/benchmarks.py b/src/llama_stack_client/resources/benchmarks.py
index ff6af994..f22865cb 100644
--- a/src/llama_stack_client/resources/benchmarks.py
+++ b/src/llama_stack_client/resources/benchmarks.py
@@ -57,6 +57,8 @@ def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Benchmark:
         """
+        Get a benchmark by its ID.
+
         Args:
           extra_headers: Send extra headers
 
@@ -86,6 +88,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> BenchmarkListResponse:
+        """List all benchmarks."""
         return self._get(
             "/v1/eval/benchmarks",
             options=make_request_options(
@@ -115,7 +118,21 @@ def register(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Register a benchmark.
+
         Args:
+          benchmark_id: The ID of the benchmark to register.
+
+          dataset_id: The ID of the dataset to use for the benchmark.
+
+          scoring_functions: The scoring functions to use for the benchmark.
+
+          metadata: The metadata to use for the benchmark.
+
+          provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
+
+          provider_id: The ID of the provider to use for the benchmark.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -177,6 +194,8 @@ async def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Benchmark:
         """
+        Get a benchmark by its ID.
+
         Args:
           extra_headers: Send extra headers
 
@@ -206,6 +225,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> BenchmarkListResponse:
+        """List all benchmarks."""
         return await self._get(
             "/v1/eval/benchmarks",
             options=make_request_options(
@@ -235,7 +255,21 @@ async def register(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Register a benchmark.
+
         Args:
+          benchmark_id: The ID of the benchmark to register.
+
+          dataset_id: The ID of the dataset to use for the benchmark.
+
+          scoring_functions: The scoring functions to use for the benchmark.
+
+          metadata: The metadata to use for the benchmark.
+
+          provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
+
+          provider_id: The ID of the provider to use for the benchmark.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/chat/completions.py b/src/llama_stack_client/resources/chat/completions.py
index 7c449d41..1ab543c5 100644
--- a/src/llama_stack_client/resources/chat/completions.py
+++ b/src/llama_stack_client/resources/chat/completions.py
@@ -18,10 +18,12 @@
     async_to_streamed_response_wrapper,
 )
 from ..._streaming import Stream, AsyncStream
-from ...types.chat import completion_create_params
+from ...types.chat import completion_list_params, completion_create_params
 from ..._base_client import make_request_options
 from ...types.chat_completion_chunk import ChatCompletionChunk
+from ...types.chat.completion_list_response import CompletionListResponse
 from ...types.chat.completion_create_response import CompletionCreateResponse
+from ...types.chat.completion_retrieve_response import CompletionRetrieveResponse
 
 __all__ = ["CompletionsResource", "AsyncCompletionsResource"]
 
@@ -87,52 +89,52 @@ def create(
         specified model.
 
         Args:
-          messages: List of messages in the conversation
+          messages: List of messages in the conversation.
 
           model: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
 
-          frequency_penalty: (Optional) The penalty for repeated tokens
+          frequency_penalty: (Optional) The penalty for repeated tokens.
 
-          function_call: (Optional) The function call to use
+          function_call: (Optional) The function call to use.
 
-          functions: (Optional) List of functions to use
+          functions: (Optional) List of functions to use.
 
-          logit_bias: (Optional) The logit bias to use
+          logit_bias: (Optional) The logit bias to use.
 
-          logprobs: (Optional) The log probabilities to use
+          logprobs: (Optional) The log probabilities to use.
 
-          max_completion_tokens: (Optional) The maximum number of tokens to generate
+          max_completion_tokens: (Optional) The maximum number of tokens to generate.
 
-          max_tokens: (Optional) The maximum number of tokens to generate
+          max_tokens: (Optional) The maximum number of tokens to generate.
 
-          n: (Optional) The number of completions to generate
+          n: (Optional) The number of completions to generate.
 
-          parallel_tool_calls: (Optional) Whether to parallelize tool calls
+          parallel_tool_calls: (Optional) Whether to parallelize tool calls.
 
-          presence_penalty: (Optional) The penalty for repeated tokens
+          presence_penalty: (Optional) The penalty for repeated tokens.
 
-          response_format: (Optional) The response format to use
+          response_format: (Optional) The response format to use.
 
-          seed: (Optional) The seed to use
+          seed: (Optional) The seed to use.
 
-          stop: (Optional) The stop tokens to use
+          stop: (Optional) The stop tokens to use.
 
-          stream: (Optional) Whether to stream the response
+          stream: (Optional) Whether to stream the response.
 
-          stream_options: (Optional) The stream options to use
+          stream_options: (Optional) The stream options to use.
 
-          temperature: (Optional) The temperature to use
+          temperature: (Optional) The temperature to use.
 
-          tool_choice: (Optional) The tool choice to use
+          tool_choice: (Optional) The tool choice to use.
 
-          tools: (Optional) The tools to use
+          tools: (Optional) The tools to use.
 
-          top_logprobs: (Optional) The top log probabilities to use
+          top_logprobs: (Optional) The top log probabilities to use.
 
-          top_p: (Optional) The top p to use
+          top_p: (Optional) The top p to use.
 
-          user: (Optional) The user to use
+          user: (Optional) The user to use.
 
           extra_headers: Send extra headers
 
@@ -185,52 +187,52 @@ def create(
         specified model.
 
         Args:
-          messages: List of messages in the conversation
+          messages: List of messages in the conversation.
 
           model: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
 
-          stream: (Optional) Whether to stream the response
+          stream: (Optional) Whether to stream the response.
 
-          frequency_penalty: (Optional) The penalty for repeated tokens
+          frequency_penalty: (Optional) The penalty for repeated tokens.
 
-          function_call: (Optional) The function call to use
+          function_call: (Optional) The function call to use.
 
-          functions: (Optional) List of functions to use
+          functions: (Optional) List of functions to use.
 
-          logit_bias: (Optional) The logit bias to use
+          logit_bias: (Optional) The logit bias to use.
 
-          logprobs: (Optional) The log probabilities to use
+          logprobs: (Optional) The log probabilities to use.
 
-          max_completion_tokens: (Optional) The maximum number of tokens to generate
+          max_completion_tokens: (Optional) The maximum number of tokens to generate.
 
-          max_tokens: (Optional) The maximum number of tokens to generate
+          max_tokens: (Optional) The maximum number of tokens to generate.
 
-          n: (Optional) The number of completions to generate
+          n: (Optional) The number of completions to generate.
 
-          parallel_tool_calls: (Optional) Whether to parallelize tool calls
+          parallel_tool_calls: (Optional) Whether to parallelize tool calls.
 
-          presence_penalty: (Optional) The penalty for repeated tokens
+          presence_penalty: (Optional) The penalty for repeated tokens.
 
-          response_format: (Optional) The response format to use
+          response_format: (Optional) The response format to use.
 
-          seed: (Optional) The seed to use
+          seed: (Optional) The seed to use.
 
-          stop: (Optional) The stop tokens to use
+          stop: (Optional) The stop tokens to use.
 
-          stream_options: (Optional) The stream options to use
+          stream_options: (Optional) The stream options to use.
 
-          temperature: (Optional) The temperature to use
+          temperature: (Optional) The temperature to use.
 
-          tool_choice: (Optional) The tool choice to use
+          tool_choice: (Optional) The tool choice to use.
 
-          tools: (Optional) The tools to use
+          tools: (Optional) The tools to use.
 
-          top_logprobs: (Optional) The top log probabilities to use
+          top_logprobs: (Optional) The top log probabilities to use.
 
-          top_p: (Optional) The top p to use
+          top_p: (Optional) The top p to use.
 
-          user: (Optional) The user to use
+          user: (Optional) The user to use.
 
           extra_headers: Send extra headers
 
@@ -283,52 +285,52 @@ def create(
         specified model.
 
         Args:
-          messages: List of messages in the conversation
+          messages: List of messages in the conversation.
 
           model: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
 
-          stream: (Optional) Whether to stream the response
+          stream: (Optional) Whether to stream the response.
 
-          frequency_penalty: (Optional) The penalty for repeated tokens
+          frequency_penalty: (Optional) The penalty for repeated tokens.
 
-          function_call: (Optional) The function call to use
+          function_call: (Optional) The function call to use.
 
-          functions: (Optional) List of functions to use
+          functions: (Optional) List of functions to use.
 
-          logit_bias: (Optional) The logit bias to use
+          logit_bias: (Optional) The logit bias to use.
 
-          logprobs: (Optional) The log probabilities to use
+          logprobs: (Optional) The log probabilities to use.
 
-          max_completion_tokens: (Optional) The maximum number of tokens to generate
+          max_completion_tokens: (Optional) The maximum number of tokens to generate.
 
-          max_tokens: (Optional) The maximum number of tokens to generate
+          max_tokens: (Optional) The maximum number of tokens to generate.
 
-          n: (Optional) The number of completions to generate
+          n: (Optional) The number of completions to generate.
 
-          parallel_tool_calls: (Optional) Whether to parallelize tool calls
+          parallel_tool_calls: (Optional) Whether to parallelize tool calls.
 
-          presence_penalty: (Optional) The penalty for repeated tokens
+          presence_penalty: (Optional) The penalty for repeated tokens.
 
-          response_format: (Optional) The response format to use
+          response_format: (Optional) The response format to use.
 
-          seed: (Optional) The seed to use
+          seed: (Optional) The seed to use.
 
-          stop: (Optional) The stop tokens to use
+          stop: (Optional) The stop tokens to use.
 
-          stream_options: (Optional) The stream options to use
+          stream_options: (Optional) The stream options to use.
 
-          temperature: (Optional) The temperature to use
+          temperature: (Optional) The temperature to use.
 
-          tool_choice: (Optional) The tool choice to use
+          tool_choice: (Optional) The tool choice to use.
 
-          tools: (Optional) The tools to use
+          tools: (Optional) The tools to use.
 
-          top_logprobs: (Optional) The top log probabilities to use
+          top_logprobs: (Optional) The top log probabilities to use.
 
-          top_p: (Optional) The top p to use
+          top_p: (Optional) The top p to use.
 
-          user: (Optional) The user to use
+          user: (Optional) The user to use.
 
           extra_headers: Send extra headers
 
@@ -418,6 +420,93 @@ def create(
             stream_cls=Stream[ChatCompletionChunk],
         )
 
+    def retrieve(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CompletionRetrieveResponse:
+        """
+        Describe a chat completion by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._get(
+            f"/v1/openai/v1/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CompletionRetrieveResponse,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CompletionListResponse:
+        """
+        List all chat completions.
+
+        Args:
+          after: The ID of the last chat completion to return.
+
+          limit: The maximum number of chat completions to return.
+
+          model: The model to filter by.
+
+          order: The order to sort the chat completions by: "asc" or "desc". Defaults to "desc".
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get(
+            "/v1/openai/v1/chat/completions",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "model": model,
+                        "order": order,
+                    },
+                    completion_list_params.CompletionListParams,
+                ),
+            ),
+            cast_to=CompletionListResponse,
+        )
+
 
 class AsyncCompletionsResource(AsyncAPIResource):
     @cached_property
@@ -480,52 +569,52 @@ async def create(
         specified model.
 
         Args:
-          messages: List of messages in the conversation
+          messages: List of messages in the conversation.
 
           model: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
 
-          frequency_penalty: (Optional) The penalty for repeated tokens
+          frequency_penalty: (Optional) The penalty for repeated tokens.
 
-          function_call: (Optional) The function call to use
+          function_call: (Optional) The function call to use.
 
-          functions: (Optional) List of functions to use
+          functions: (Optional) List of functions to use.
 
-          logit_bias: (Optional) The logit bias to use
+          logit_bias: (Optional) The logit bias to use.
 
-          logprobs: (Optional) The log probabilities to use
+          logprobs: (Optional) The log probabilities to use.
 
-          max_completion_tokens: (Optional) The maximum number of tokens to generate
+          max_completion_tokens: (Optional) The maximum number of tokens to generate.
 
-          max_tokens: (Optional) The maximum number of tokens to generate
+          max_tokens: (Optional) The maximum number of tokens to generate.
 
-          n: (Optional) The number of completions to generate
+          n: (Optional) The number of completions to generate.
 
-          parallel_tool_calls: (Optional) Whether to parallelize tool calls
+          parallel_tool_calls: (Optional) Whether to parallelize tool calls.
 
-          presence_penalty: (Optional) The penalty for repeated tokens
+          presence_penalty: (Optional) The penalty for repeated tokens.
 
-          response_format: (Optional) The response format to use
+          response_format: (Optional) The response format to use.
 
-          seed: (Optional) The seed to use
+          seed: (Optional) The seed to use.
 
-          stop: (Optional) The stop tokens to use
+          stop: (Optional) The stop tokens to use.
 
-          stream: (Optional) Whether to stream the response
+          stream: (Optional) Whether to stream the response.
 
-          stream_options: (Optional) The stream options to use
+          stream_options: (Optional) The stream options to use.
 
-          temperature: (Optional) The temperature to use
+          temperature: (Optional) The temperature to use.
 
-          tool_choice: (Optional) The tool choice to use
+          tool_choice: (Optional) The tool choice to use.
 
-          tools: (Optional) The tools to use
+          tools: (Optional) The tools to use.
 
-          top_logprobs: (Optional) The top log probabilities to use
+          top_logprobs: (Optional) The top log probabilities to use.
 
-          top_p: (Optional) The top p to use
+          top_p: (Optional) The top p to use.
 
-          user: (Optional) The user to use
+          user: (Optional) The user to use.
 
           extra_headers: Send extra headers
 
@@ -578,52 +667,52 @@ async def create(
         specified model.
 
         Args:
-          messages: List of messages in the conversation
+          messages: List of messages in the conversation.
 
           model: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
 
-          stream: (Optional) Whether to stream the response
+          stream: (Optional) Whether to stream the response.
 
-          frequency_penalty: (Optional) The penalty for repeated tokens
+          frequency_penalty: (Optional) The penalty for repeated tokens.
 
-          function_call: (Optional) The function call to use
+          function_call: (Optional) The function call to use.
 
-          functions: (Optional) List of functions to use
+          functions: (Optional) List of functions to use.
 
-          logit_bias: (Optional) The logit bias to use
+          logit_bias: (Optional) The logit bias to use.
 
-          logprobs: (Optional) The log probabilities to use
+          logprobs: (Optional) The log probabilities to use.
 
-          max_completion_tokens: (Optional) The maximum number of tokens to generate
+          max_completion_tokens: (Optional) The maximum number of tokens to generate.
 
-          max_tokens: (Optional) The maximum number of tokens to generate
+          max_tokens: (Optional) The maximum number of tokens to generate.
 
-          n: (Optional) The number of completions to generate
+          n: (Optional) The number of completions to generate.
 
-          parallel_tool_calls: (Optional) Whether to parallelize tool calls
+          parallel_tool_calls: (Optional) Whether to parallelize tool calls.
 
-          presence_penalty: (Optional) The penalty for repeated tokens
+          presence_penalty: (Optional) The penalty for repeated tokens.
 
-          response_format: (Optional) The response format to use
+          response_format: (Optional) The response format to use.
 
-          seed: (Optional) The seed to use
+          seed: (Optional) The seed to use.
 
-          stop: (Optional) The stop tokens to use
+          stop: (Optional) The stop tokens to use.
 
-          stream_options: (Optional) The stream options to use
+          stream_options: (Optional) The stream options to use.
 
-          temperature: (Optional) The temperature to use
+          temperature: (Optional) The temperature to use.
 
-          tool_choice: (Optional) The tool choice to use
+          tool_choice: (Optional) The tool choice to use.
 
-          tools: (Optional) The tools to use
+          tools: (Optional) The tools to use.
 
-          top_logprobs: (Optional) The top log probabilities to use
+          top_logprobs: (Optional) The top log probabilities to use.
 
-          top_p: (Optional) The top p to use
+          top_p: (Optional) The top p to use.
 
-          user: (Optional) The user to use
+          user: (Optional) The user to use.
 
           extra_headers: Send extra headers
 
@@ -676,52 +765,52 @@ async def create(
         specified model.
 
         Args:
-          messages: List of messages in the conversation
+          messages: List of messages in the conversation.
 
           model: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
 
-          stream: (Optional) Whether to stream the response
+          stream: (Optional) Whether to stream the response.
 
-          frequency_penalty: (Optional) The penalty for repeated tokens
+          frequency_penalty: (Optional) The penalty for repeated tokens.
 
-          function_call: (Optional) The function call to use
+          function_call: (Optional) The function call to use.
 
-          functions: (Optional) List of functions to use
+          functions: (Optional) List of functions to use.
 
-          logit_bias: (Optional) The logit bias to use
+          logit_bias: (Optional) The logit bias to use.
 
-          logprobs: (Optional) The log probabilities to use
+          logprobs: (Optional) The log probabilities to use.
 
-          max_completion_tokens: (Optional) The maximum number of tokens to generate
+          max_completion_tokens: (Optional) The maximum number of tokens to generate.
 
-          max_tokens: (Optional) The maximum number of tokens to generate
+          max_tokens: (Optional) The maximum number of tokens to generate.
 
-          n: (Optional) The number of completions to generate
+          n: (Optional) The number of completions to generate.
 
-          parallel_tool_calls: (Optional) Whether to parallelize tool calls
+          parallel_tool_calls: (Optional) Whether to parallelize tool calls.
 
-          presence_penalty: (Optional) The penalty for repeated tokens
+          presence_penalty: (Optional) The penalty for repeated tokens.
 
-          response_format: (Optional) The response format to use
+          response_format: (Optional) The response format to use.
 
-          seed: (Optional) The seed to use
+          seed: (Optional) The seed to use.
 
-          stop: (Optional) The stop tokens to use
+          stop: (Optional) The stop tokens to use.
 
-          stream_options: (Optional) The stream options to use
+          stream_options: (Optional) The stream options to use.
 
-          temperature: (Optional) The temperature to use
+          temperature: (Optional) The temperature to use.
 
-          tool_choice: (Optional) The tool choice to use
+          tool_choice: (Optional) The tool choice to use.
 
-          tools: (Optional) The tools to use
+          tools: (Optional) The tools to use.
 
-          top_logprobs: (Optional) The top log probabilities to use
+          top_logprobs: (Optional) The top log probabilities to use.
 
-          top_p: (Optional) The top p to use
+          top_p: (Optional) The top p to use.
 
-          user: (Optional) The user to use
+          user: (Optional) The user to use.
 
           extra_headers: Send extra headers
 
@@ -811,6 +900,93 @@ async def create(
             stream_cls=AsyncStream[ChatCompletionChunk],
         )
 
+    async def retrieve(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CompletionRetrieveResponse:
+        """
+        Describe a chat completion by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return await self._get(
+            f"/v1/openai/v1/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CompletionRetrieveResponse,
+        )
+
+    async def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CompletionListResponse:
+        """
+        List all chat completions.
+
+        Args:
+          after: The ID of the last chat completion to return.
+
+          limit: The maximum number of chat completions to return.
+
+          model: The model to filter by.
+
+          order: The order to sort the chat completions by: "asc" or "desc". Defaults to "desc".
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._get(
+            "/v1/openai/v1/chat/completions",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "model": model,
+                        "order": order,
+                    },
+                    completion_list_params.CompletionListParams,
+                ),
+            ),
+            cast_to=CompletionListResponse,
+        )
+
 
 class CompletionsResourceWithRawResponse:
     def __init__(self, completions: CompletionsResource) -> None:
@@ -819,6 +995,12 @@ def __init__(self, completions: CompletionsResource) -> None:
         self.create = to_raw_response_wrapper(
             completions.create,
         )
+        self.retrieve = to_raw_response_wrapper(
+            completions.retrieve,
+        )
+        self.list = to_raw_response_wrapper(
+            completions.list,
+        )
 
 
 class AsyncCompletionsResourceWithRawResponse:
@@ -828,6 +1010,12 @@ def __init__(self, completions: AsyncCompletionsResource) -> None:
         self.create = async_to_raw_response_wrapper(
             completions.create,
         )
+        self.retrieve = async_to_raw_response_wrapper(
+            completions.retrieve,
+        )
+        self.list = async_to_raw_response_wrapper(
+            completions.list,
+        )
 
 
 class CompletionsResourceWithStreamingResponse:
@@ -837,6 +1025,12 @@ def __init__(self, completions: CompletionsResource) -> None:
         self.create = to_streamed_response_wrapper(
             completions.create,
         )
+        self.retrieve = to_streamed_response_wrapper(
+            completions.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            completions.list,
+        )
 
 
 class AsyncCompletionsResourceWithStreamingResponse:
@@ -846,3 +1040,9 @@ def __init__(self, completions: AsyncCompletionsResource) -> None:
         self.create = async_to_streamed_response_wrapper(
             completions.create,
         )
+        self.retrieve = async_to_streamed_response_wrapper(
+            completions.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            completions.list,
+        )
diff --git a/src/llama_stack_client/resources/completions.py b/src/llama_stack_client/resources/completions.py
index 8f57aeb4..dfdd80f4 100644
--- a/src/llama_stack_client/resources/completions.py
+++ b/src/llama_stack_client/resources/completions.py
@@ -83,37 +83,37 @@ def create(
           model: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
 
-          prompt: The prompt to generate a completion for
+          prompt: The prompt to generate a completion for.
 
-          best_of: (Optional) The number of completions to generate
+          best_of: (Optional) The number of completions to generate.
 
-          echo: (Optional) Whether to echo the prompt
+          echo: (Optional) Whether to echo the prompt.
 
-          frequency_penalty: (Optional) The penalty for repeated tokens
+          frequency_penalty: (Optional) The penalty for repeated tokens.
 
-          logit_bias: (Optional) The logit bias to use
+          logit_bias: (Optional) The logit bias to use.
 
-          logprobs: (Optional) The log probabilities to use
+          logprobs: (Optional) The log probabilities to use.
 
-          max_tokens: (Optional) The maximum number of tokens to generate
+          max_tokens: (Optional) The maximum number of tokens to generate.
 
-          n: (Optional) The number of completions to generate
+          n: (Optional) The number of completions to generate.
 
-          presence_penalty: (Optional) The penalty for repeated tokens
+          presence_penalty: (Optional) The penalty for repeated tokens.
 
-          seed: (Optional) The seed to use
+          seed: (Optional) The seed to use.
 
-          stop: (Optional) The stop tokens to use
+          stop: (Optional) The stop tokens to use.
 
-          stream: (Optional) Whether to stream the response
+          stream: (Optional) Whether to stream the response.
 
-          stream_options: (Optional) The stream options to use
+          stream_options: (Optional) The stream options to use.
 
-          temperature: (Optional) The temperature to use
+          temperature: (Optional) The temperature to use.
 
-          top_p: (Optional) The top p to use
+          top_p: (Optional) The top p to use.
 
-          user: (Optional) The user to use
+          user: (Optional) The user to use.
 
           extra_headers: Send extra headers
 
@@ -163,37 +163,37 @@ def create(
           model: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
 
-          prompt: The prompt to generate a completion for
+          prompt: The prompt to generate a completion for.
 
-          stream: (Optional) Whether to stream the response
+          stream: (Optional) Whether to stream the response.
 
-          best_of: (Optional) The number of completions to generate
+          best_of: (Optional) The number of completions to generate.
 
-          echo: (Optional) Whether to echo the prompt
+          echo: (Optional) Whether to echo the prompt.
 
-          frequency_penalty: (Optional) The penalty for repeated tokens
+          frequency_penalty: (Optional) The penalty for repeated tokens.
 
-          logit_bias: (Optional) The logit bias to use
+          logit_bias: (Optional) The logit bias to use.
 
-          logprobs: (Optional) The log probabilities to use
+          logprobs: (Optional) The log probabilities to use.
 
-          max_tokens: (Optional) The maximum number of tokens to generate
+          max_tokens: (Optional) The maximum number of tokens to generate.
 
-          n: (Optional) The number of completions to generate
+          n: (Optional) The number of completions to generate.
 
-          presence_penalty: (Optional) The penalty for repeated tokens
+          presence_penalty: (Optional) The penalty for repeated tokens.
 
-          seed: (Optional) The seed to use
+          seed: (Optional) The seed to use.
 
-          stop: (Optional) The stop tokens to use
+          stop: (Optional) The stop tokens to use.
 
-          stream_options: (Optional) The stream options to use
+          stream_options: (Optional) The stream options to use.
 
-          temperature: (Optional) The temperature to use
+          temperature: (Optional) The temperature to use.
 
-          top_p: (Optional) The top p to use
+          top_p: (Optional) The top p to use.
 
-          user: (Optional) The user to use
+          user: (Optional) The user to use.
 
           extra_headers: Send extra headers
 
@@ -243,37 +243,37 @@ def create(
           model: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
 
-          prompt: The prompt to generate a completion for
+          prompt: The prompt to generate a completion for.
 
-          stream: (Optional) Whether to stream the response
+          stream: (Optional) Whether to stream the response.
 
-          best_of: (Optional) The number of completions to generate
+          best_of: (Optional) The number of completions to generate.
 
-          echo: (Optional) Whether to echo the prompt
+          echo: (Optional) Whether to echo the prompt.
 
-          frequency_penalty: (Optional) The penalty for repeated tokens
+          frequency_penalty: (Optional) The penalty for repeated tokens.
 
-          logit_bias: (Optional) The logit bias to use
+          logit_bias: (Optional) The logit bias to use.
 
-          logprobs: (Optional) The log probabilities to use
+          logprobs: (Optional) The log probabilities to use.
 
-          max_tokens: (Optional) The maximum number of tokens to generate
+          max_tokens: (Optional) The maximum number of tokens to generate.
 
-          n: (Optional) The number of completions to generate
+          n: (Optional) The number of completions to generate.
 
-          presence_penalty: (Optional) The penalty for repeated tokens
+          presence_penalty: (Optional) The penalty for repeated tokens.
 
-          seed: (Optional) The seed to use
+          seed: (Optional) The seed to use.
 
-          stop: (Optional) The stop tokens to use
+          stop: (Optional) The stop tokens to use.
 
-          stream_options: (Optional) The stream options to use
+          stream_options: (Optional) The stream options to use.
 
-          temperature: (Optional) The temperature to use
+          temperature: (Optional) The temperature to use.
 
-          top_p: (Optional) The top p to use
+          top_p: (Optional) The top p to use.
 
-          user: (Optional) The user to use
+          user: (Optional) The user to use.
 
           extra_headers: Send extra headers
 
@@ -410,37 +410,37 @@ async def create(
           model: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
 
-          prompt: The prompt to generate a completion for
+          prompt: The prompt to generate a completion for.
 
-          best_of: (Optional) The number of completions to generate
+          best_of: (Optional) The number of completions to generate.
 
-          echo: (Optional) Whether to echo the prompt
+          echo: (Optional) Whether to echo the prompt.
 
-          frequency_penalty: (Optional) The penalty for repeated tokens
+          frequency_penalty: (Optional) The penalty for repeated tokens.
 
-          logit_bias: (Optional) The logit bias to use
+          logit_bias: (Optional) The logit bias to use.
 
-          logprobs: (Optional) The log probabilities to use
+          logprobs: (Optional) The log probabilities to use.
 
-          max_tokens: (Optional) The maximum number of tokens to generate
+          max_tokens: (Optional) The maximum number of tokens to generate.
 
-          n: (Optional) The number of completions to generate
+          n: (Optional) The number of completions to generate.
 
-          presence_penalty: (Optional) The penalty for repeated tokens
+          presence_penalty: (Optional) The penalty for repeated tokens.
 
-          seed: (Optional) The seed to use
+          seed: (Optional) The seed to use.
 
-          stop: (Optional) The stop tokens to use
+          stop: (Optional) The stop tokens to use.
 
-          stream: (Optional) Whether to stream the response
+          stream: (Optional) Whether to stream the response.
 
-          stream_options: (Optional) The stream options to use
+          stream_options: (Optional) The stream options to use.
 
-          temperature: (Optional) The temperature to use
+          temperature: (Optional) The temperature to use.
 
-          top_p: (Optional) The top p to use
+          top_p: (Optional) The top p to use.
 
-          user: (Optional) The user to use
+          user: (Optional) The user to use.
 
           extra_headers: Send extra headers
 
@@ -490,37 +490,37 @@ async def create(
           model: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
 
-          prompt: The prompt to generate a completion for
+          prompt: The prompt to generate a completion for.
 
-          stream: (Optional) Whether to stream the response
+          stream: (Optional) Whether to stream the response.
 
-          best_of: (Optional) The number of completions to generate
+          best_of: (Optional) The number of completions to generate.
 
-          echo: (Optional) Whether to echo the prompt
+          echo: (Optional) Whether to echo the prompt.
 
-          frequency_penalty: (Optional) The penalty for repeated tokens
+          frequency_penalty: (Optional) The penalty for repeated tokens.
 
-          logit_bias: (Optional) The logit bias to use
+          logit_bias: (Optional) The logit bias to use.
 
-          logprobs: (Optional) The log probabilities to use
+          logprobs: (Optional) The log probabilities to use.
 
-          max_tokens: (Optional) The maximum number of tokens to generate
+          max_tokens: (Optional) The maximum number of tokens to generate.
 
-          n: (Optional) The number of completions to generate
+          n: (Optional) The number of completions to generate.
 
-          presence_penalty: (Optional) The penalty for repeated tokens
+          presence_penalty: (Optional) The penalty for repeated tokens.
 
-          seed: (Optional) The seed to use
+          seed: (Optional) The seed to use.
 
-          stop: (Optional) The stop tokens to use
+          stop: (Optional) The stop tokens to use.
 
-          stream_options: (Optional) The stream options to use
+          stream_options: (Optional) The stream options to use.
 
-          temperature: (Optional) The temperature to use
+          temperature: (Optional) The temperature to use.
 
-          top_p: (Optional) The top p to use
+          top_p: (Optional) The top p to use.
 
-          user: (Optional) The user to use
+          user: (Optional) The user to use.
 
           extra_headers: Send extra headers
 
@@ -570,37 +570,37 @@ async def create(
           model: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
 
-          prompt: The prompt to generate a completion for
+          prompt: The prompt to generate a completion for.
 
-          stream: (Optional) Whether to stream the response
+          stream: (Optional) Whether to stream the response.
 
-          best_of: (Optional) The number of completions to generate
+          best_of: (Optional) The number of completions to generate.
 
-          echo: (Optional) Whether to echo the prompt
+          echo: (Optional) Whether to echo the prompt.
 
-          frequency_penalty: (Optional) The penalty for repeated tokens
+          frequency_penalty: (Optional) The penalty for repeated tokens.
 
-          logit_bias: (Optional) The logit bias to use
+          logit_bias: (Optional) The logit bias to use.
 
-          logprobs: (Optional) The log probabilities to use
+          logprobs: (Optional) The log probabilities to use.
 
-          max_tokens: (Optional) The maximum number of tokens to generate
+          max_tokens: (Optional) The maximum number of tokens to generate.
 
-          n: (Optional) The number of completions to generate
+          n: (Optional) The number of completions to generate.
 
-          presence_penalty: (Optional) The penalty for repeated tokens
+          presence_penalty: (Optional) The penalty for repeated tokens.
 
-          seed: (Optional) The seed to use
+          seed: (Optional) The seed to use.
 
-          stop: (Optional) The stop tokens to use
+          stop: (Optional) The stop tokens to use.
 
-          stream_options: (Optional) The stream options to use
+          stream_options: (Optional) The stream options to use.
 
-          temperature: (Optional) The temperature to use
+          temperature: (Optional) The temperature to use.
 
-          top_p: (Optional) The top p to use
+          top_p: (Optional) The top p to use.
 
-          user: (Optional) The user to use
+          user: (Optional) The user to use.
 
           extra_headers: Send extra headers
 
diff --git a/src/llama_stack_client/resources/datasets.py b/src/llama_stack_client/resources/datasets.py
index 845f182b..447e2305 100644
--- a/src/llama_stack_client/resources/datasets.py
+++ b/src/llama_stack_client/resources/datasets.py
@@ -60,6 +60,8 @@ def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> DatasetRetrieveResponse:
         """
+        Get a dataset by its ID.
+
         Args:
           extra_headers: Send extra headers
 
@@ -89,6 +91,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> DatasetListResponse:
+        """List all datasets."""
         return self._get(
             "/v1/datasets",
             options=make_request_options(
@@ -123,8 +126,8 @@ def iterrows(
 
         The response includes:
 
-        - data: List of items for the current page
-        - has_more: Whether there are more items available after this set
+        - data: List of items for the current page.
+        - has_more: Whether there are more items available after this set.
 
         Args:
           limit: The number of rows to get.
@@ -178,7 +181,7 @@ def register(
         Args:
           purpose: The purpose of the dataset.
 
-        One of - "post-training/messages": The dataset
+        One of: - "post-training/messages": The dataset
               contains a messages column with list of messages for post-training. {
               "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
               "assistant", "content": "Hello, world!"}, ] } - "eval/question-answer": The
@@ -201,7 +204,7 @@ def register(
 
           dataset_id: The ID of the dataset. If not provided, an ID will be generated.
 
-          metadata: The metadata for the dataset. - E.g. {"description": "My dataset"}
+          metadata: The metadata for the dataset. - E.g. {"description": "My dataset"}.
 
           extra_headers: Send extra headers
 
@@ -240,6 +243,8 @@ def unregister(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Unregister a dataset by its ID.
+
         Args:
           extra_headers: Send extra headers
 
@@ -293,6 +298,8 @@ async def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> DatasetRetrieveResponse:
         """
+        Get a dataset by its ID.
+
         Args:
           extra_headers: Send extra headers
 
@@ -322,6 +329,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> DatasetListResponse:
+        """List all datasets."""
         return await self._get(
             "/v1/datasets",
             options=make_request_options(
@@ -356,8 +364,8 @@ async def iterrows(
 
         The response includes:
 
-        - data: List of items for the current page
-        - has_more: Whether there are more items available after this set
+        - data: List of items for the current page.
+        - has_more: Whether there are more items available after this set.
 
         Args:
           limit: The number of rows to get.
@@ -411,7 +419,7 @@ async def register(
         Args:
           purpose: The purpose of the dataset.
 
-        One of - "post-training/messages": The dataset
+        One of: - "post-training/messages": The dataset
               contains a messages column with list of messages for post-training. {
               "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
               "assistant", "content": "Hello, world!"}, ] } - "eval/question-answer": The
@@ -434,7 +442,7 @@ async def register(
 
           dataset_id: The ID of the dataset. If not provided, an ID will be generated.
 
-          metadata: The metadata for the dataset. - E.g. {"description": "My dataset"}
+          metadata: The metadata for the dataset. - E.g. {"description": "My dataset"}.
 
           extra_headers: Send extra headers
 
@@ -473,6 +481,8 @@ async def unregister(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Unregister a dataset by its ID.
+
         Args:
           extra_headers: Send extra headers
 
diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py
index be87eda4..a8bebdad 100644
--- a/src/llama_stack_client/resources/inference.py
+++ b/src/llama_stack_client/resources/inference.py
@@ -79,12 +79,24 @@ def batch_chat_completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceBatchChatCompletionResponse:
         """
+        Generate chat completions for a batch of messages using the specified model.
+
         Args:
-          response_format: Configuration for JSON schema-guided response generation.
+          messages_batch: The messages to generate completions for.
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding.
+
+          sampling_params: (Optional) Parameters to control the sampling strategy.
 
-          sampling_params: Sampling parameters.
+          tool_config: (Optional) Configuration for tool use.
 
-          tool_config: Configuration for tool use.
+          tools: (Optional) List of tool definitions available to the model.
 
           extra_headers: Send extra headers
 
@@ -130,10 +142,20 @@ def batch_completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> BatchCompletion:
         """
+        Generate completions for a batch of content using the specified model.
+
         Args:
-          response_format: Configuration for JSON schema-guided response generation.
+          content_batch: The content to generate completions for.
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding.
 
-          sampling_params: Sampling parameters.
+          sampling_params: (Optional) Parameters to control the sampling strategy.
 
           extra_headers: Send extra headers
 
@@ -186,7 +208,7 @@ def chat_completion(
         Generate a chat completion for the given messages using the specified model.
 
         Args:
-          messages: List of messages in the conversation
+          messages: List of messages in the conversation.
 
           model_id: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
@@ -199,7 +221,7 @@ def chat_completion(
               providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
               grammar. This format is more flexible, but not all providers support it.
 
-          sampling_params: Parameters to control the sampling strategy
+          sampling_params: Parameters to control the sampling strategy.
 
           stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
               False.
@@ -217,7 +239,7 @@ def chat_completion(
               are output as Python syntax -- a list of function calls. .. deprecated:: Use
               tool_config instead.
 
-          tools: (Optional) List of tool definitions available to the model
+          tools: (Optional) List of tool definitions available to the model.
 
           extra_headers: Send extra headers
 
@@ -254,7 +276,7 @@ def chat_completion(
         Generate a chat completion for the given messages using the specified model.
 
         Args:
-          messages: List of messages in the conversation
+          messages: List of messages in the conversation.
 
           model_id: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
@@ -270,7 +292,7 @@ def chat_completion(
               providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
               grammar. This format is more flexible, but not all providers support it.
 
-          sampling_params: Parameters to control the sampling strategy
+          sampling_params: Parameters to control the sampling strategy.
 
           tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
               ToolChoice.auto. .. deprecated:: Use tool_config instead.
@@ -285,7 +307,7 @@ def chat_completion(
               are output as Python syntax -- a list of function calls. .. deprecated:: Use
               tool_config instead.
 
-          tools: (Optional) List of tool definitions available to the model
+          tools: (Optional) List of tool definitions available to the model.
 
           extra_headers: Send extra headers
 
@@ -322,7 +344,7 @@ def chat_completion(
         Generate a chat completion for the given messages using the specified model.
 
         Args:
-          messages: List of messages in the conversation
+          messages: List of messages in the conversation.
 
           model_id: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
@@ -338,7 +360,7 @@ def chat_completion(
               providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
               grammar. This format is more flexible, but not all providers support it.
 
-          sampling_params: Parameters to control the sampling strategy
+          sampling_params: Parameters to control the sampling strategy.
 
           tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
               ToolChoice.auto. .. deprecated:: Use tool_config instead.
@@ -353,7 +375,7 @@ def chat_completion(
               are output as Python syntax -- a list of function calls. .. deprecated:: Use
               tool_config instead.
 
-          tools: (Optional) List of tool definitions available to the model
+          tools: (Optional) List of tool definitions available to the model.
 
           extra_headers: Send extra headers
 
@@ -436,7 +458,7 @@ def completion(
         Generate a completion for the given content using the specified model.
 
         Args:
-          content: The content to generate a completion for
+          content: The content to generate a completion for.
 
           model_id: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
@@ -444,9 +466,9 @@ def completion(
           logprobs: (Optional) If specified, log probabilities for each token position will be
               returned.
 
-          response_format: (Optional) Grammar specification for guided (structured) decoding
+          response_format: (Optional) Grammar specification for guided (structured) decoding.
 
-          sampling_params: (Optional) Parameters to control the sampling strategy
+          sampling_params: (Optional) Parameters to control the sampling strategy.
 
           stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
               False.
@@ -482,7 +504,7 @@ def completion(
         Generate a completion for the given content using the specified model.
 
         Args:
-          content: The content to generate a completion for
+          content: The content to generate a completion for.
 
           model_id: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
@@ -493,9 +515,9 @@ def completion(
           logprobs: (Optional) If specified, log probabilities for each token position will be
               returned.
 
-          response_format: (Optional) Grammar specification for guided (structured) decoding
+          response_format: (Optional) Grammar specification for guided (structured) decoding.
 
-          sampling_params: (Optional) Parameters to control the sampling strategy
+          sampling_params: (Optional) Parameters to control the sampling strategy.
 
           extra_headers: Send extra headers
 
@@ -528,7 +550,7 @@ def completion(
         Generate a completion for the given content using the specified model.
 
         Args:
-          content: The content to generate a completion for
+          content: The content to generate a completion for.
 
           model_id: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
@@ -539,9 +561,9 @@ def completion(
           logprobs: (Optional) If specified, log probabilities for each token position will be
               returned.
 
-          response_format: (Optional) Grammar specification for guided (structured) decoding
+          response_format: (Optional) Grammar specification for guided (structured) decoding.
 
-          sampling_params: (Optional) Parameters to control the sampling strategy
+          sampling_params: (Optional) Parameters to control the sampling strategy.
 
           extra_headers: Send extra headers
 
@@ -695,12 +717,24 @@ async def batch_chat_completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceBatchChatCompletionResponse:
         """
+        Generate chat completions for a batch of messages using the specified model.
+
         Args:
-          response_format: Configuration for JSON schema-guided response generation.
+          messages_batch: The messages to generate completions for.
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding.
+
+          sampling_params: (Optional) Parameters to control the sampling strategy.
 
-          sampling_params: Sampling parameters.
+          tool_config: (Optional) Configuration for tool use.
 
-          tool_config: Configuration for tool use.
+          tools: (Optional) List of tool definitions available to the model.
 
           extra_headers: Send extra headers
 
@@ -746,10 +780,20 @@ async def batch_completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> BatchCompletion:
         """
+        Generate completions for a batch of content using the specified model.
+
         Args:
-          response_format: Configuration for JSON schema-guided response generation.
+          content_batch: The content to generate completions for.
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding.
 
-          sampling_params: Sampling parameters.
+          sampling_params: (Optional) Parameters to control the sampling strategy.
 
           extra_headers: Send extra headers
 
@@ -802,7 +846,7 @@ async def chat_completion(
         Generate a chat completion for the given messages using the specified model.
 
         Args:
-          messages: List of messages in the conversation
+          messages: List of messages in the conversation.
 
           model_id: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
@@ -815,7 +859,7 @@ async def chat_completion(
               providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
               grammar. This format is more flexible, but not all providers support it.
 
-          sampling_params: Parameters to control the sampling strategy
+          sampling_params: Parameters to control the sampling strategy.
 
           stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
               False.
@@ -833,7 +877,7 @@ async def chat_completion(
               are output as Python syntax -- a list of function calls. .. deprecated:: Use
               tool_config instead.
 
-          tools: (Optional) List of tool definitions available to the model
+          tools: (Optional) List of tool definitions available to the model.
 
           extra_headers: Send extra headers
 
@@ -870,7 +914,7 @@ async def chat_completion(
         Generate a chat completion for the given messages using the specified model.
 
         Args:
-          messages: List of messages in the conversation
+          messages: List of messages in the conversation.
 
           model_id: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
@@ -886,7 +930,7 @@ async def chat_completion(
               providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
               grammar. This format is more flexible, but not all providers support it.
 
-          sampling_params: Parameters to control the sampling strategy
+          sampling_params: Parameters to control the sampling strategy.
 
           tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
               ToolChoice.auto. .. deprecated:: Use tool_config instead.
@@ -901,7 +945,7 @@ async def chat_completion(
               are output as Python syntax -- a list of function calls. .. deprecated:: Use
               tool_config instead.
 
-          tools: (Optional) List of tool definitions available to the model
+          tools: (Optional) List of tool definitions available to the model.
 
           extra_headers: Send extra headers
 
@@ -938,7 +982,7 @@ async def chat_completion(
         Generate a chat completion for the given messages using the specified model.
 
         Args:
-          messages: List of messages in the conversation
+          messages: List of messages in the conversation.
 
           model_id: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
@@ -954,7 +998,7 @@ async def chat_completion(
               providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
               grammar. This format is more flexible, but not all providers support it.
 
-          sampling_params: Parameters to control the sampling strategy
+          sampling_params: Parameters to control the sampling strategy.
 
           tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
               ToolChoice.auto. .. deprecated:: Use tool_config instead.
@@ -969,7 +1013,7 @@ async def chat_completion(
               are output as Python syntax -- a list of function calls. .. deprecated:: Use
               tool_config instead.
 
-          tools: (Optional) List of tool definitions available to the model
+          tools: (Optional) List of tool definitions available to the model.
 
           extra_headers: Send extra headers
 
@@ -1052,7 +1096,7 @@ async def completion(
         Generate a completion for the given content using the specified model.
 
         Args:
-          content: The content to generate a completion for
+          content: The content to generate a completion for.
 
           model_id: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
@@ -1060,9 +1104,9 @@ async def completion(
           logprobs: (Optional) If specified, log probabilities for each token position will be
               returned.
 
-          response_format: (Optional) Grammar specification for guided (structured) decoding
+          response_format: (Optional) Grammar specification for guided (structured) decoding.
 
-          sampling_params: (Optional) Parameters to control the sampling strategy
+          sampling_params: (Optional) Parameters to control the sampling strategy.
 
           stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
               False.
@@ -1098,7 +1142,7 @@ async def completion(
         Generate a completion for the given content using the specified model.
 
         Args:
-          content: The content to generate a completion for
+          content: The content to generate a completion for.
 
           model_id: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
@@ -1109,9 +1153,9 @@ async def completion(
           logprobs: (Optional) If specified, log probabilities for each token position will be
               returned.
 
-          response_format: (Optional) Grammar specification for guided (structured) decoding
+          response_format: (Optional) Grammar specification for guided (structured) decoding.
 
-          sampling_params: (Optional) Parameters to control the sampling strategy
+          sampling_params: (Optional) Parameters to control the sampling strategy.
 
           extra_headers: Send extra headers
 
@@ -1144,7 +1188,7 @@ async def completion(
         Generate a completion for the given content using the specified model.
 
         Args:
-          content: The content to generate a completion for
+          content: The content to generate a completion for.
 
           model_id: The identifier of the model to use. The model must be registered with Llama
               Stack and available via the /models endpoint.
@@ -1155,9 +1199,9 @@ async def completion(
           logprobs: (Optional) If specified, log probabilities for each token position will be
               returned.
 
-          response_format: (Optional) Grammar specification for guided (structured) decoding
+          response_format: (Optional) Grammar specification for guided (structured) decoding.
 
-          sampling_params: (Optional) Parameters to control the sampling strategy
+          sampling_params: (Optional) Parameters to control the sampling strategy.
 
           extra_headers: Send extra headers
 
diff --git a/src/llama_stack_client/resources/inspect.py b/src/llama_stack_client/resources/inspect.py
index 86fe1e43..dd9044e3 100644
--- a/src/llama_stack_client/resources/inspect.py
+++ b/src/llama_stack_client/resources/inspect.py
@@ -50,6 +50,7 @@ def health(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> HealthInfo:
+        """Get the health of the service."""
         return self._get(
             "/v1/health",
             options=make_request_options(
@@ -68,6 +69,7 @@ def version(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> VersionInfo:
+        """Get the version of the service."""
         return self._get(
             "/v1/version",
             options=make_request_options(
@@ -107,6 +109,7 @@ async def health(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> HealthInfo:
+        """Get the health of the service."""
         return await self._get(
             "/v1/health",
             options=make_request_options(
@@ -125,6 +128,7 @@ async def version(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> VersionInfo:
+        """Get the version of the service."""
         return await self._get(
             "/v1/version",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/models.py b/src/llama_stack_client/resources/models.py
index 02458d3b..b3fcaa69 100644
--- a/src/llama_stack_client/resources/models.py
+++ b/src/llama_stack_client/resources/models.py
@@ -58,6 +58,8 @@ def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Model:
         """
+        Get a model by its identifier.
+
         Args:
           extra_headers: Send extra headers
 
@@ -87,6 +89,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModelListResponse:
+        """List all models."""
         return self._get(
             "/v1/models",
             options=make_request_options(
@@ -115,7 +118,19 @@ def register(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Model:
         """
+        Register a model.
+
         Args:
+          model_id: The identifier of the model to register.
+
+          metadata: Any additional metadata for this model.
+
+          model_type: The type of model to register.
+
+          provider_id: The identifier of the provider.
+
+          provider_model_id: The identifier of the model in the provider.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -154,6 +169,8 @@ def unregister(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Unregister a model.
+
         Args:
           extra_headers: Send extra headers
 
@@ -207,6 +224,8 @@ async def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Model:
         """
+        Get a model by its identifier.
+
         Args:
           extra_headers: Send extra headers
 
@@ -236,6 +255,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModelListResponse:
+        """List all models."""
         return await self._get(
             "/v1/models",
             options=make_request_options(
@@ -264,7 +284,19 @@ async def register(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Model:
         """
+        Register a model.
+
         Args:
+          model_id: The identifier of the model to register.
+
+          metadata: Any additional metadata for this model.
+
+          model_type: The type of model to register.
+
+          provider_id: The identifier of the provider.
+
+          provider_model_id: The identifier of the model in the provider.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -303,6 +335,8 @@ async def unregister(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Unregister a model.
+
         Args:
           extra_headers: Send extra headers
 
diff --git a/src/llama_stack_client/resources/post_training/job.py b/src/llama_stack_client/resources/post_training/job.py
index a55ba7fa..51a6ccda 100644
--- a/src/llama_stack_client/resources/post_training/job.py
+++ b/src/llama_stack_client/resources/post_training/job.py
@@ -56,6 +56,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> List[Data]:
+        """Get all training jobs."""
         return self._get(
             "/v1/post-training/jobs",
             options=make_request_options(
@@ -80,7 +81,11 @@ def artifacts(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> JobArtifactsResponse:
         """
+        Get the artifacts of a training job.
+
         Args:
+          job_uuid: The UUID of the job to get the artifacts of.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -113,7 +118,11 @@ def cancel(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Cancel a training job.
+
         Args:
+          job_uuid: The UUID of the job to cancel.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -144,7 +153,11 @@ def status(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> JobStatusResponse:
         """
+        Get the status of a training job.
+
         Args:
+          job_uuid: The UUID of the job to get the status of.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -196,6 +209,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> List[Data]:
+        """Get all training jobs."""
         return await self._get(
             "/v1/post-training/jobs",
             options=make_request_options(
@@ -220,7 +234,11 @@ async def artifacts(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> JobArtifactsResponse:
         """
+        Get the artifacts of a training job.
+
         Args:
+          job_uuid: The UUID of the job to get the artifacts of.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -253,7 +271,11 @@ async def cancel(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Cancel a training job.
+
         Args:
+          job_uuid: The UUID of the job to cancel.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -284,7 +306,11 @@ async def status(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> JobStatusResponse:
         """
+        Get the status of a training job.
+
         Args:
+          job_uuid: The UUID of the job to get the status of.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/post_training/post_training.py b/src/llama_stack_client/resources/post_training/post_training.py
index fe0d2b7b..6149edc3 100644
--- a/src/llama_stack_client/resources/post_training/post_training.py
+++ b/src/llama_stack_client/resources/post_training/post_training.py
@@ -76,7 +76,21 @@ def preference_optimize(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> PostTrainingJob:
         """
+        Run preference optimization of a model.
+
         Args:
+          algorithm_config: The algorithm configuration.
+
+          finetuned_model: The model to fine-tune.
+
+          hyperparam_search_config: The hyperparam search configuration.
+
+          job_uuid: The UUID of the job to create.
+
+          logger_config: The logger configuration.
+
+          training_config: The training configuration.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -122,7 +136,23 @@ def supervised_fine_tune(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> PostTrainingJob:
         """
+        Run supervised fine-tuning of a model.
+
         Args:
+          hyperparam_search_config: The hyperparam search configuration.
+
+          job_uuid: The UUID of the job to create.
+
+          logger_config: The logger configuration.
+
+          training_config: The training configuration.
+
+          algorithm_config: The algorithm configuration.
+
+          checkpoint_dir: The directory to save checkpoint(s) to.
+
+          model: The model to fine-tune.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -193,7 +223,21 @@ async def preference_optimize(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> PostTrainingJob:
         """
+        Run preference optimization of a model.
+
         Args:
+          algorithm_config: The algorithm configuration.
+
+          finetuned_model: The model to fine-tune.
+
+          hyperparam_search_config: The hyperparam search configuration.
+
+          job_uuid: The UUID of the job to create.
+
+          logger_config: The logger configuration.
+
+          training_config: The training configuration.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -239,7 +283,23 @@ async def supervised_fine_tune(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> PostTrainingJob:
         """
+        Run supervised fine-tuning of a model.
+
         Args:
+          hyperparam_search_config: The hyperparam search configuration.
+
+          job_uuid: The UUID of the job to create.
+
+          logger_config: The logger configuration.
+
+          training_config: The training configuration.
+
+          algorithm_config: The algorithm configuration.
+
+          checkpoint_dir: The directory to save checkpoint(s) to.
+
+          model: The model to fine-tune.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/providers.py b/src/llama_stack_client/resources/providers.py
index d87960ac..a1c092eb 100644
--- a/src/llama_stack_client/resources/providers.py
+++ b/src/llama_stack_client/resources/providers.py
@@ -55,6 +55,8 @@ def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ProviderInfo:
         """
+        Get detailed information about a specific provider.
+
         Args:
           extra_headers: Send extra headers
 
@@ -84,6 +86,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ProviderListResponse:
+        """List all available providers."""
         return self._get(
             "/v1/providers",
             options=make_request_options(
@@ -129,6 +132,8 @@ async def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ProviderInfo:
         """
+        Get detailed information about a specific provider.
+
         Args:
           extra_headers: Send extra headers
 
@@ -158,6 +163,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ProviderListResponse:
+        """List all available providers."""
         return await self._get(
             "/v1/providers",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/responses.py b/src/llama_stack_client/resources/responses.py
index 5e7bd4c3..bb7fcdc6 100644
--- a/src/llama_stack_client/resources/responses.py
+++ b/src/llama_stack_client/resources/responses.py
@@ -52,9 +52,11 @@ def create(
         *,
         input: Union[str, Iterable[response_create_params.InputUnionMember1]],
         model: str,
+        instructions: str | NotGiven = NOT_GIVEN,
         previous_response_id: str | NotGiven = NOT_GIVEN,
         store: bool | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
         tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -92,8 +94,10 @@ def create(
         input: Union[str, Iterable[response_create_params.InputUnionMember1]],
         model: str,
         stream: Literal[True],
+        instructions: str | NotGiven = NOT_GIVEN,
         previous_response_id: str | NotGiven = NOT_GIVEN,
         store: bool | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
         tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -131,8 +135,10 @@ def create(
         input: Union[str, Iterable[response_create_params.InputUnionMember1]],
         model: str,
         stream: bool,
+        instructions: str | NotGiven = NOT_GIVEN,
         previous_response_id: str | NotGiven = NOT_GIVEN,
         store: bool | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
         tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -169,9 +175,11 @@ def create(
         *,
         input: Union[str, Iterable[response_create_params.InputUnionMember1]],
         model: str,
+        instructions: str | NotGiven = NOT_GIVEN,
         previous_response_id: str | NotGiven = NOT_GIVEN,
         store: bool | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
         tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -186,9 +194,11 @@ def create(
                 {
                     "input": input,
                     "model": model,
+                    "instructions": instructions,
                     "previous_response_id": previous_response_id,
                     "store": store,
                     "stream": stream,
+                    "temperature": temperature,
                     "tools": tools,
                 },
                 response_create_params.ResponseCreateParamsStreaming
@@ -205,7 +215,7 @@ def create(
 
     def retrieve(
         self,
-        id: str,
+        response_id: str,
         *,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -226,10 +236,10 @@ def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not id:
-            raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
         return self._get(
-            f"/v1/openai/v1/responses/{id}",
+            f"/v1/openai/v1/responses/{response_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -263,9 +273,11 @@ async def create(
         *,
         input: Union[str, Iterable[response_create_params.InputUnionMember1]],
         model: str,
+        instructions: str | NotGiven = NOT_GIVEN,
         previous_response_id: str | NotGiven = NOT_GIVEN,
         store: bool | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
         tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -303,8 +315,10 @@ async def create(
         input: Union[str, Iterable[response_create_params.InputUnionMember1]],
         model: str,
         stream: Literal[True],
+        instructions: str | NotGiven = NOT_GIVEN,
         previous_response_id: str | NotGiven = NOT_GIVEN,
         store: bool | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
         tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -342,8 +356,10 @@ async def create(
         input: Union[str, Iterable[response_create_params.InputUnionMember1]],
         model: str,
         stream: bool,
+        instructions: str | NotGiven = NOT_GIVEN,
         previous_response_id: str | NotGiven = NOT_GIVEN,
         store: bool | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
         tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -380,9 +396,11 @@ async def create(
         *,
         input: Union[str, Iterable[response_create_params.InputUnionMember1]],
         model: str,
+        instructions: str | NotGiven = NOT_GIVEN,
         previous_response_id: str | NotGiven = NOT_GIVEN,
         store: bool | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
         tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -397,9 +415,11 @@ async def create(
                 {
                     "input": input,
                     "model": model,
+                    "instructions": instructions,
                     "previous_response_id": previous_response_id,
                     "store": store,
                     "stream": stream,
+                    "temperature": temperature,
                     "tools": tools,
                 },
                 response_create_params.ResponseCreateParamsStreaming
@@ -416,7 +436,7 @@ async def create(
 
     async def retrieve(
         self,
-        id: str,
+        response_id: str,
         *,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -437,10 +457,10 @@ async def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not id:
-            raise ValueError(f"Expected a non-empty value for `id` but received {id!r}")
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
         return await self._get(
-            f"/v1/openai/v1/responses/{id}",
+            f"/v1/openai/v1/responses/{response_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
diff --git a/src/llama_stack_client/resources/routes.py b/src/llama_stack_client/resources/routes.py
index a0aefdfa..d7c7cfff 100644
--- a/src/llama_stack_client/resources/routes.py
+++ b/src/llama_stack_client/resources/routes.py
@@ -52,6 +52,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> RouteListResponse:
+        """List all routes."""
         return self._get(
             "/v1/inspect/routes",
             options=make_request_options(
@@ -95,6 +96,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> RouteListResponse:
+        """List all routes."""
         return await self._get(
             "/v1/inspect/routes",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/safety.py b/src/llama_stack_client/resources/safety.py
index 66646102..abd2e775 100644
--- a/src/llama_stack_client/resources/safety.py
+++ b/src/llama_stack_client/resources/safety.py
@@ -58,7 +58,15 @@ def run_shield(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> RunShieldResponse:
         """
+        Run a shield.
+
         Args:
+          messages: The messages to run the shield on.
+
+          params: The parameters of the shield.
+
+          shield_id: The identifier of the shield to run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -118,7 +126,15 @@ async def run_shield(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> RunShieldResponse:
         """
+        Run a shield.
+
         Args:
+          messages: The messages to run the shield on.
+
+          params: The parameters of the shield.
+
+          shield_id: The identifier of the shield to run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/scoring.py b/src/llama_stack_client/resources/scoring.py
index 33ee8969..dd650a82 100644
--- a/src/llama_stack_client/resources/scoring.py
+++ b/src/llama_stack_client/resources/scoring.py
@@ -102,7 +102,15 @@ def score_batch(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ScoringScoreBatchResponse:
         """
+        Score a batch of rows.
+
         Args:
+          dataset_id: The ID of the dataset to score.
+
+          save_results_dataset: Whether to save the results to a dataset.
+
+          scoring_functions: The scoring functions to use for the scoring.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -205,7 +213,15 @@ async def score_batch(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ScoringScoreBatchResponse:
         """
+        Score a batch of rows.
+
         Args:
+          dataset_id: The ID of the dataset to score.
+
+          save_results_dataset: Whether to save the results to a dataset.
+
+          scoring_functions: The scoring functions to use for the scoring.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/scoring_functions.py b/src/llama_stack_client/resources/scoring_functions.py
index f01ff17b..07d9586f 100644
--- a/src/llama_stack_client/resources/scoring_functions.py
+++ b/src/llama_stack_client/resources/scoring_functions.py
@@ -59,6 +59,8 @@ def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ScoringFn:
         """
+        Get a scoring function by its ID.
+
         Args:
           extra_headers: Send extra headers
 
@@ -88,6 +90,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ScoringFunctionListResponse:
+        """List all scoring functions."""
         return self._get(
             "/v1/scoring-functions",
             options=make_request_options(
@@ -117,7 +120,20 @@ def register(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Register a scoring function.
+
         Args:
+          description: The description of the scoring function.
+
+          scoring_fn_id: The ID of the scoring function to register.
+
+          params: The parameters for the scoring function for benchmark eval, these can be
+              overridden for app eval.
+
+          provider_id: The ID of the provider to use for the scoring function.
+
+          provider_scoring_fn_id: The ID of the provider scoring function to use for the scoring function.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -179,6 +195,8 @@ async def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ScoringFn:
         """
+        Get a scoring function by its ID.
+
         Args:
           extra_headers: Send extra headers
 
@@ -208,6 +226,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ScoringFunctionListResponse:
+        """List all scoring functions."""
         return await self._get(
             "/v1/scoring-functions",
             options=make_request_options(
@@ -237,7 +256,20 @@ async def register(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Register a scoring function.
+
         Args:
+          description: The description of the scoring function.
+
+          scoring_fn_id: The ID of the scoring function to register.
+
+          params: The parameters for the scoring function for benchmark eval, these can be
+              overridden for app eval.
+
+          provider_id: The ID of the provider to use for the scoring function.
+
+          provider_scoring_fn_id: The ID of the provider scoring function to use for the scoring function.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/shields.py b/src/llama_stack_client/resources/shields.py
index 4ef88ac7..5982298a 100644
--- a/src/llama_stack_client/resources/shields.py
+++ b/src/llama_stack_client/resources/shields.py
@@ -57,6 +57,8 @@ def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Shield:
         """
+        Get a shield by its identifier.
+
         Args:
           extra_headers: Send extra headers
 
@@ -86,6 +88,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ShieldListResponse:
+        """List all shields."""
         return self._get(
             "/v1/shields",
             options=make_request_options(
@@ -113,7 +116,17 @@ def register(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Shield:
         """
+        Register a shield.
+
         Args:
+          shield_id: The identifier of the shield to register.
+
+          params: The parameters of the shield.
+
+          provider_id: The identifier of the provider.
+
+          provider_shield_id: The identifier of the shield in the provider.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -172,6 +185,8 @@ async def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Shield:
         """
+        Get a shield by its identifier.
+
         Args:
           extra_headers: Send extra headers
 
@@ -201,6 +216,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ShieldListResponse:
+        """List all shields."""
         return await self._get(
             "/v1/shields",
             options=make_request_options(
@@ -228,7 +244,17 @@ async def register(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Shield:
         """
+        Register a shield.
+
         Args:
+          shield_id: The identifier of the shield to register.
+
+          params: The parameters of the shield.
+
+          provider_id: The identifier of the provider.
+
+          provider_shield_id: The identifier of the shield in the provider.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/telemetry.py b/src/llama_stack_client/resources/telemetry.py
index 12261eee..96af4528 100644
--- a/src/llama_stack_client/resources/telemetry.py
+++ b/src/llama_stack_client/resources/telemetry.py
@@ -69,6 +69,8 @@ def get_span(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> TelemetryGetSpanResponse:
         """
+        Get a span by its ID.
+
         Args:
           extra_headers: Send extra headers
 
@@ -104,7 +106,13 @@ def get_span_tree(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> TelemetryGetSpanTreeResponse:
         """
+        Get a span tree by its ID.
+
         Args:
+          attributes_to_return: The attributes to return in the tree.
+
+          max_depth: The maximum depth of the tree.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -146,6 +154,8 @@ def get_trace(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Trace:
         """
+        Get a trace by its ID.
+
         Args:
           extra_headers: Send extra headers
 
@@ -178,7 +188,13 @@ def log_event(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Log an event.
+
         Args:
+          event: The event to log.
+
+          ttl_seconds: The time to live of the event.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -217,7 +233,15 @@ def query_spans(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> TelemetryQuerySpansResponse:
         """
+        Query spans.
+
         Args:
+          attribute_filters: The attribute filters to apply to the spans.
+
+          attributes_to_return: The attributes to return in the spans.
+
+          max_depth: The maximum depth of the tree.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -261,7 +285,17 @@ def query_traces(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> TelemetryQueryTracesResponse:
         """
+        Query traces.
+
         Args:
+          attribute_filters: The attribute filters to apply to the traces.
+
+          limit: The limit of traces to return.
+
+          offset: The offset of the traces to return.
+
+          order_by: The order by of the traces to return.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -306,7 +340,17 @@ def save_spans_to_dataset(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Save spans to a dataset.
+
         Args:
+          attribute_filters: The attribute filters to apply to the spans.
+
+          attributes_to_save: The attributes to save to the dataset.
+
+          dataset_id: The ID of the dataset to save the spans to.
+
+          max_depth: The maximum depth of the tree.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -367,6 +411,8 @@ async def get_span(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> TelemetryGetSpanResponse:
         """
+        Get a span by its ID.
+
         Args:
           extra_headers: Send extra headers
 
@@ -402,7 +448,13 @@ async def get_span_tree(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> TelemetryGetSpanTreeResponse:
         """
+        Get a span tree by its ID.
+
         Args:
+          attributes_to_return: The attributes to return in the tree.
+
+          max_depth: The maximum depth of the tree.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -444,6 +496,8 @@ async def get_trace(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Trace:
         """
+        Get a trace by its ID.
+
         Args:
           extra_headers: Send extra headers
 
@@ -476,7 +530,13 @@ async def log_event(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Log an event.
+
         Args:
+          event: The event to log.
+
+          ttl_seconds: The time to live of the event.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -515,7 +575,15 @@ async def query_spans(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> TelemetryQuerySpansResponse:
         """
+        Query spans.
+
         Args:
+          attribute_filters: The attribute filters to apply to the spans.
+
+          attributes_to_return: The attributes to return in the spans.
+
+          max_depth: The maximum depth of the tree.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -559,7 +627,17 @@ async def query_traces(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> TelemetryQueryTracesResponse:
         """
+        Query traces.
+
         Args:
+          attribute_filters: The attribute filters to apply to the traces.
+
+          limit: The limit of traces to return.
+
+          offset: The offset of the traces to return.
+
+          order_by: The order by of the traces to return.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -604,7 +682,17 @@ async def save_spans_to_dataset(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Save spans to a dataset.
+
         Args:
+          attribute_filters: The attribute filters to apply to the spans.
+
+          attributes_to_save: The attributes to save to the dataset.
+
+          dataset_id: The ID of the dataset to save the spans to.
+
+          max_depth: The maximum depth of the tree.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/tool_runtime/rag_tool.py b/src/llama_stack_client/resources/tool_runtime/rag_tool.py
index 048ea980..0ed84418 100644
--- a/src/llama_stack_client/resources/tool_runtime/rag_tool.py
+++ b/src/llama_stack_client/resources/tool_runtime/rag_tool.py
@@ -107,6 +107,8 @@ def query(
         Args:
           content: A image content item
 
+          query_config: Configuration for the RAG query generation.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -213,6 +215,8 @@ async def query(
         Args:
           content: A image content item
 
+          query_config: Configuration for the RAG query generation.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py
index dda3f661..1df58598 100644
--- a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py
+++ b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py
@@ -70,9 +70,13 @@ def invoke_tool(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ToolInvocationResult:
         """
-        Run a tool with the given arguments
+        Run a tool with the given arguments.
 
         Args:
+          kwargs: A dictionary of arguments to pass to the tool.
+
+          tool_name: The name of the tool to invoke.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -109,7 +113,13 @@ def list_tools(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ToolRuntimeListToolsResponse:
         """
+        List all tools in the runtime.
+
         Args:
+          mcp_endpoint: The MCP endpoint to use for the tool group.
+
+          tool_group_id: The ID of the tool group to list tools for.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -175,9 +185,13 @@ async def invoke_tool(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ToolInvocationResult:
         """
-        Run a tool with the given arguments
+        Run a tool with the given arguments.
 
         Args:
+          kwargs: A dictionary of arguments to pass to the tool.
+
+          tool_name: The name of the tool to invoke.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -214,7 +228,13 @@ async def list_tools(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ToolRuntimeListToolsResponse:
         """
+        List all tools in the runtime.
+
         Args:
+          mcp_endpoint: The MCP endpoint to use for the tool group.
+
+          tool_group_id: The ID of the tool group to list tools for.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/llama_stack_client/resources/toolgroups.py b/src/llama_stack_client/resources/toolgroups.py
index d882a6eb..a618657d 100644
--- a/src/llama_stack_client/resources/toolgroups.py
+++ b/src/llama_stack_client/resources/toolgroups.py
@@ -55,7 +55,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ToolgroupListResponse:
-        """List tool groups with optional provider"""
+        """List tool groups with optional provider."""
         return self._get(
             "/v1/toolgroups",
             options=make_request_options(
@@ -80,6 +80,8 @@ def get(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ToolGroup:
         """
+        Get a tool group by its ID.
+
         Args:
           extra_headers: Send extra headers
 
@@ -114,9 +116,17 @@ def register(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
-        Register a tool group
+        Register a tool group.
 
         Args:
+          provider_id: The ID of the provider to use for the tool group.
+
+          toolgroup_id: The ID of the tool group to register.
+
+          args: A dictionary of arguments to pass to the tool group.
+
+          mcp_endpoint: The MCP endpoint to use for the tool group.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -155,7 +165,7 @@ def unregister(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
-        Unregister a tool group
+        Unregister a tool group.
 
         Args:
           extra_headers: Send extra headers
@@ -208,7 +218,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ToolgroupListResponse:
-        """List tool groups with optional provider"""
+        """List tool groups with optional provider."""
         return await self._get(
             "/v1/toolgroups",
             options=make_request_options(
@@ -233,6 +243,8 @@ async def get(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ToolGroup:
         """
+        Get a tool group by its ID.
+
         Args:
           extra_headers: Send extra headers
 
@@ -267,9 +279,17 @@ async def register(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
-        Register a tool group
+        Register a tool group.
 
         Args:
+          provider_id: The ID of the provider to use for the tool group.
+
+          toolgroup_id: The ID of the tool group to register.
+
+          args: A dictionary of arguments to pass to the tool group.
+
+          mcp_endpoint: The MCP endpoint to use for the tool group.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -308,7 +328,7 @@ async def unregister(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
-        Unregister a tool group
+        Unregister a tool group.
 
         Args:
           extra_headers: Send extra headers
diff --git a/src/llama_stack_client/resources/tools.py b/src/llama_stack_client/resources/tools.py
index 8a9b91e8..c9fd3808 100644
--- a/src/llama_stack_client/resources/tools.py
+++ b/src/llama_stack_client/resources/tools.py
@@ -57,9 +57,11 @@ def list(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ToolListResponse:
         """
-        List tools with optional tool group
+        List tools with optional tool group.
 
         Args:
+          toolgroup_id: The ID of the tool group to list tools for.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -93,6 +95,8 @@ def get(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Tool:
         """
+        Get a tool by its name.
+
         Args:
           extra_headers: Send extra headers
 
@@ -145,9 +149,11 @@ async def list(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ToolListResponse:
         """
-        List tools with optional tool group
+        List tools with optional tool group.
 
         Args:
+          toolgroup_id: The ID of the tool group to list tools for.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -181,6 +187,8 @@ async def get(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Tool:
         """
+        Get a tool by its name.
+
         Args:
           extra_headers: Send extra headers
 
diff --git a/src/llama_stack_client/resources/vector_dbs.py b/src/llama_stack_client/resources/vector_dbs.py
index c75d261d..c1ad232d 100644
--- a/src/llama_stack_client/resources/vector_dbs.py
+++ b/src/llama_stack_client/resources/vector_dbs.py
@@ -58,6 +58,8 @@ def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> VectorDBRetrieveResponse:
         """
+        Get a vector database by its identifier.
+
         Args:
           extra_headers: Send extra headers
 
@@ -87,6 +89,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> VectorDBListResponse:
+        """List all vector databases."""
         return self._get(
             "/v1/vector-dbs",
             options=make_request_options(
@@ -115,7 +118,19 @@ def register(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> VectorDBRegisterResponse:
         """
+        Register a vector database.
+
         Args:
+          embedding_model: The embedding model to use.
+
+          vector_db_id: The identifier of the vector database to register.
+
+          embedding_dimension: The dimension of the embedding model.
+
+          provider_id: The identifier of the provider.
+
+          provider_vector_db_id: The identifier of the vector database in the provider.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -154,6 +169,8 @@ def unregister(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Unregister a vector database.
+
         Args:
           extra_headers: Send extra headers
 
@@ -207,6 +224,8 @@ async def retrieve(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> VectorDBRetrieveResponse:
         """
+        Get a vector database by its identifier.
+
         Args:
           extra_headers: Send extra headers
 
@@ -236,6 +255,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> VectorDBListResponse:
+        """List all vector databases."""
         return await self._get(
             "/v1/vector-dbs",
             options=make_request_options(
@@ -264,7 +284,19 @@ async def register(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> VectorDBRegisterResponse:
         """
+        Register a vector database.
+
         Args:
+          embedding_model: The embedding model to use.
+
+          vector_db_id: The identifier of the vector database to register.
+
+          embedding_dimension: The dimension of the embedding model.
+
+          provider_id: The identifier of the provider.
+
+          provider_vector_db_id: The identifier of the vector database in the provider.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -303,6 +335,8 @@ async def unregister(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Unregister a vector database.
+
         Args:
           extra_headers: Send extra headers
 
diff --git a/src/llama_stack_client/resources/vector_io.py b/src/llama_stack_client/resources/vector_io.py
index 9b1e8822..7ec4691e 100644
--- a/src/llama_stack_client/resources/vector_io.py
+++ b/src/llama_stack_client/resources/vector_io.py
@@ -58,7 +58,15 @@ def insert(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Insert chunks into a vector database.
+
         Args:
+          chunks: The chunks to insert.
+
+          vector_db_id: The identifier of the vector database to insert the chunks into.
+
+          ttl_seconds: The time to live of the chunks.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -98,8 +106,14 @@ def query(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> QueryChunksResponse:
         """
+        Query chunks from a vector database.
+
         Args:
-          query: A image content item
+          query: The query to search for.
+
+          vector_db_id: The identifier of the vector database to query.
+
+          params: The parameters of the query.
 
           extra_headers: Send extra headers
 
@@ -160,7 +174,15 @@ async def insert(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
+        Insert chunks into a vector database.
+
         Args:
+          chunks: The chunks to insert.
+
+          vector_db_id: The identifier of the vector database to insert the chunks into.
+
+          ttl_seconds: The time to live of the chunks.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -200,8 +222,14 @@ async def query(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> QueryChunksResponse:
         """
+        Query chunks from a vector database.
+
         Args:
-          query: A image content item
+          query: The query to search for.
+
+          vector_db_id: The identifier of the vector database to query.
+
+          params: The parameters of the query.
 
           extra_headers: Send extra headers
 
diff --git a/src/llama_stack_client/types/benchmark.py b/src/llama_stack_client/types/benchmark.py
index 3af66f6a..e0b1ce9e 100644
--- a/src/llama_stack_client/types/benchmark.py
+++ b/src/llama_stack_client/types/benchmark.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Dict, List, Union
+from typing import Dict, List, Union, Optional
 from typing_extensions import Literal
 
 from .._models import BaseModel
@@ -17,8 +17,8 @@ class Benchmark(BaseModel):
 
     provider_id: str
 
-    provider_resource_id: str
-
     scoring_functions: List[str]
 
     type: Literal["benchmark"]
+
+    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/benchmark_register_params.py b/src/llama_stack_client/types/benchmark_register_params.py
index def970a1..0fa9d508 100644
--- a/src/llama_stack_client/types/benchmark_register_params.py
+++ b/src/llama_stack_client/types/benchmark_register_params.py
@@ -10,13 +10,19 @@
 
 class BenchmarkRegisterParams(TypedDict, total=False):
     benchmark_id: Required[str]
+    """The ID of the benchmark to register."""
 
     dataset_id: Required[str]
+    """The ID of the dataset to use for the benchmark."""
 
     scoring_functions: Required[List[str]]
+    """The scoring functions to use for the benchmark."""
 
     metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """The metadata to use for the benchmark."""
 
     provider_benchmark_id: str
+    """The ID of the provider benchmark to use for the benchmark."""
 
     provider_id: str
+    """The ID of the provider to use for the benchmark."""
diff --git a/src/llama_stack_client/types/chat/__init__.py b/src/llama_stack_client/types/chat/__init__.py
index 9384ac14..27720e7f 100644
--- a/src/llama_stack_client/types/chat/__init__.py
+++ b/src/llama_stack_client/types/chat/__init__.py
@@ -2,5 +2,8 @@
 
 from __future__ import annotations
 
+from .completion_list_params import CompletionListParams as CompletionListParams
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
+from .completion_list_response import CompletionListResponse as CompletionListResponse
 from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
+from .completion_retrieve_response import CompletionRetrieveResponse as CompletionRetrieveResponse
diff --git a/src/llama_stack_client/types/chat/completion_create_params.py b/src/llama_stack_client/types/chat/completion_create_params.py
index 0281420b..2c9d26f7 100644
--- a/src/llama_stack_client/types/chat/completion_create_params.py
+++ b/src/llama_stack_client/types/chat/completion_create_params.py
@@ -47,7 +47,7 @@
 
 class CompletionCreateParamsBase(TypedDict, total=False):
     messages: Required[Iterable[Message]]
-    """List of messages in the conversation"""
+    """List of messages in the conversation."""
 
     model: Required[str]
     """The identifier of the model to use.
@@ -57,64 +57,64 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     frequency_penalty: float
-    """(Optional) The penalty for repeated tokens"""
+    """(Optional) The penalty for repeated tokens."""
 
     function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """(Optional) The function call to use"""
+    """(Optional) The function call to use."""
 
     functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """(Optional) List of functions to use"""
+    """(Optional) List of functions to use."""
 
     logit_bias: Dict[str, float]
-    """(Optional) The logit bias to use"""
+    """(Optional) The logit bias to use."""
 
     logprobs: bool
-    """(Optional) The log probabilities to use"""
+    """(Optional) The log probabilities to use."""
 
     max_completion_tokens: int
-    """(Optional) The maximum number of tokens to generate"""
+    """(Optional) The maximum number of tokens to generate."""
 
     max_tokens: int
-    """(Optional) The maximum number of tokens to generate"""
+    """(Optional) The maximum number of tokens to generate."""
 
     n: int
-    """(Optional) The number of completions to generate"""
+    """(Optional) The number of completions to generate."""
 
     parallel_tool_calls: bool
-    """(Optional) Whether to parallelize tool calls"""
+    """(Optional) Whether to parallelize tool calls."""
 
     presence_penalty: float
-    """(Optional) The penalty for repeated tokens"""
+    """(Optional) The penalty for repeated tokens."""
 
     response_format: ResponseFormat
-    """(Optional) The response format to use"""
+    """(Optional) The response format to use."""
 
     seed: int
-    """(Optional) The seed to use"""
+    """(Optional) The seed to use."""
 
     stop: Union[str, List[str]]
-    """(Optional) The stop tokens to use"""
+    """(Optional) The stop tokens to use."""
 
     stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """(Optional) The stream options to use"""
+    """(Optional) The stream options to use."""
 
     temperature: float
-    """(Optional) The temperature to use"""
+    """(Optional) The temperature to use."""
 
     tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """(Optional) The tool choice to use"""
+    """(Optional) The tool choice to use."""
 
     tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """(Optional) The tools to use"""
+    """(Optional) The tools to use."""
 
     top_logprobs: int
-    """(Optional) The top log probabilities to use"""
+    """(Optional) The top log probabilities to use."""
 
     top_p: float
-    """(Optional) The top p to use"""
+    """(Optional) The top p to use."""
 
     user: str
-    """(Optional) The user to use"""
+    """(Optional) The user to use."""
 
 
 class MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(TypedDict, total=False):
@@ -390,12 +390,12 @@ class ResponseFormatOpenAIResponseFormatJsonObject(TypedDict, total=False):
 
 class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
     stream: Literal[False]
-    """(Optional) Whether to stream the response"""
+    """(Optional) Whether to stream the response."""
 
 
 class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
     stream: Required[Literal[True]]
-    """(Optional) Whether to stream the response"""
+    """(Optional) Whether to stream the response."""
 
 
 CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming]
diff --git a/src/llama_stack_client/types/chat/completion_list_params.py b/src/llama_stack_client/types/chat/completion_list_params.py
new file mode 100644
index 00000000..5fb77c2c
--- /dev/null
+++ b/src/llama_stack_client/types/chat/completion_list_params.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["CompletionListParams"]
+
+
+class CompletionListParams(TypedDict, total=False):
+    after: str
+    """The ID of the last chat completion to return."""
+
+    limit: int
+    """The maximum number of chat completions to return."""
+
+    model: str
+    """The model to filter by."""
+
+    order: Literal["asc", "desc"]
+    """The order to sort the chat completions by: "asc" or "desc". Defaults to "desc"."""
diff --git a/src/llama_stack_client/types/chat/completion_list_response.py b/src/llama_stack_client/types/chat/completion_list_response.py
new file mode 100644
index 00000000..d3b580a1
--- /dev/null
+++ b/src/llama_stack_client/types/chat/completion_list_response.py
@@ -0,0 +1,667 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+    "CompletionListResponse",
+    "Data",
+    "DataChoice",
+    "DataChoiceMessage",
+    "DataChoiceMessageOpenAIUserMessageParam",
+    "DataChoiceMessageOpenAIUserMessageParamContentUnionMember1",
+    "DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "DataChoiceMessageOpenAISystemMessageParam",
+    "DataChoiceMessageOpenAISystemMessageParamContentUnionMember1",
+    "DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "DataChoiceMessageOpenAIAssistantMessageParam",
+    "DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1",
+    "DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "DataChoiceMessageOpenAIAssistantMessageParamToolCall",
+    "DataChoiceMessageOpenAIAssistantMessageParamToolCallFunction",
+    "DataChoiceMessageOpenAIToolMessageParam",
+    "DataChoiceMessageOpenAIToolMessageParamContentUnionMember1",
+    "DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "DataChoiceMessageOpenAIDeveloperMessageParam",
+    "DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1",
+    "DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "DataChoiceLogprobs",
+    "DataChoiceLogprobsContent",
+    "DataChoiceLogprobsContentTopLogprob",
+    "DataChoiceLogprobsRefusal",
+    "DataChoiceLogprobsRefusalTopLogprob",
+    "DataInputMessage",
+    "DataInputMessageOpenAIUserMessageParam",
+    "DataInputMessageOpenAIUserMessageParamContentUnionMember1",
+    "DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "DataInputMessageOpenAISystemMessageParam",
+    "DataInputMessageOpenAISystemMessageParamContentUnionMember1",
+    "DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "DataInputMessageOpenAIAssistantMessageParam",
+    "DataInputMessageOpenAIAssistantMessageParamContentUnionMember1",
+    "DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "DataInputMessageOpenAIAssistantMessageParamToolCall",
+    "DataInputMessageOpenAIAssistantMessageParamToolCallFunction",
+    "DataInputMessageOpenAIToolMessageParam",
+    "DataInputMessageOpenAIToolMessageParamContentUnionMember1",
+    "DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "DataInputMessageOpenAIDeveloperMessageParam",
+    "DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1",
+    "DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+]
+
+
+class DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: (
+        DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+DataChoiceMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataChoiceMessageOpenAIUserMessageParam(BaseModel):
+    content: Union[str, List[DataChoiceMessageOpenAIUserMessageParamContentUnionMember1]]
+    """The content of the message, which can include text and other media"""
+
+    role: Literal["user"]
+    """Must be "user" to identify this as a user message"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the user message participant."""
+
+
+class DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: (
+        DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+DataChoiceMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataChoiceMessageOpenAISystemMessageParam(BaseModel):
+    content: Union[str, List[DataChoiceMessageOpenAISystemMessageParamContentUnionMember1]]
+    """The content of the "system prompt".
+
+    If multiple system messages are provided, they are concatenated. The underlying
+    Llama Stack code may also add other system messages (for example, for formatting
+    tool definitions).
+    """
+
+    role: Literal["system"]
+    """Must be "system" to identify this as a system message"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the system message participant."""
+
+
+class DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(
+    BaseModel
+):
+    text: str
+
+    type: Literal["text"]
+
+
+class DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
+    BaseModel
+):
+    image_url: (
+        DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataChoiceMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel):
+    arguments: Optional[str] = None
+
+    name: Optional[str] = None
+
+
+class DataChoiceMessageOpenAIAssistantMessageParamToolCall(BaseModel):
+    type: Literal["function"]
+
+    id: Optional[str] = None
+
+    function: Optional[DataChoiceMessageOpenAIAssistantMessageParamToolCallFunction] = None
+
+    index: Optional[int] = None
+
+
+class DataChoiceMessageOpenAIAssistantMessageParam(BaseModel):
+    role: Literal["assistant"]
+    """Must be "assistant" to identify this as the model's response"""
+
+    content: Union[str, List[DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1], None] = None
+    """The content of the model's response"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the assistant message participant."""
+
+    tool_calls: Optional[List[DataChoiceMessageOpenAIAssistantMessageParamToolCall]] = None
+    """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."""
+
+
+class DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: (
+        DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+DataChoiceMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataChoiceMessageOpenAIToolMessageParam(BaseModel):
+    content: Union[str, List[DataChoiceMessageOpenAIToolMessageParamContentUnionMember1]]
+    """The response content from the tool"""
+
+    role: Literal["tool"]
+    """Must be "tool" to identify this as a tool response"""
+
+    tool_call_id: str
+    """Unique identifier for the tool call this response is for"""
+
+
+class DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(
+    BaseModel
+):
+    text: str
+
+    type: Literal["text"]
+
+
+class DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
+    BaseModel
+):
+    image_url: (
+        DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataChoiceMessageOpenAIDeveloperMessageParam(BaseModel):
+    content: Union[str, List[DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1]]
+    """The content of the developer message"""
+
+    role: Literal["developer"]
+    """Must be "developer" to identify this as a developer message"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the developer message participant."""
+
+
+DataChoiceMessage: TypeAlias = Annotated[
+    Union[
+        DataChoiceMessageOpenAIUserMessageParam,
+        DataChoiceMessageOpenAISystemMessageParam,
+        DataChoiceMessageOpenAIAssistantMessageParam,
+        DataChoiceMessageOpenAIToolMessageParam,
+        DataChoiceMessageOpenAIDeveloperMessageParam,
+    ],
+    PropertyInfo(discriminator="role"),
+]
+
+
+class DataChoiceLogprobsContentTopLogprob(BaseModel):
+    token: str
+
+    logprob: float
+
+    bytes: Optional[List[int]] = None
+
+
+class DataChoiceLogprobsContent(BaseModel):
+    token: str
+
+    logprob: float
+
+    top_logprobs: List[DataChoiceLogprobsContentTopLogprob]
+
+    bytes: Optional[List[int]] = None
+
+
+class DataChoiceLogprobsRefusalTopLogprob(BaseModel):
+    token: str
+
+    logprob: float
+
+    bytes: Optional[List[int]] = None
+
+
+class DataChoiceLogprobsRefusal(BaseModel):
+    token: str
+
+    logprob: float
+
+    top_logprobs: List[DataChoiceLogprobsRefusalTopLogprob]
+
+    bytes: Optional[List[int]] = None
+
+
+class DataChoiceLogprobs(BaseModel):
+    content: Optional[List[DataChoiceLogprobsContent]] = None
+    """(Optional) The log probabilities for the tokens in the message"""
+
+    refusal: Optional[List[DataChoiceLogprobsRefusal]] = None
+    """(Optional) The log probabilities for the tokens in the message"""
+
+
+class DataChoice(BaseModel):
+    finish_reason: str
+    """The reason the model stopped generating"""
+
+    index: int
+    """The index of the choice"""
+
+    message: DataChoiceMessage
+    """The message from the model"""
+
+    logprobs: Optional[DataChoiceLogprobs] = None
+    """(Optional) The log probabilities for the tokens in the message"""
+
+
+class DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: (
+        DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+DataInputMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataInputMessageOpenAIUserMessageParam(BaseModel):
+    content: Union[str, List[DataInputMessageOpenAIUserMessageParamContentUnionMember1]]
+    """The content of the message, which can include text and other media"""
+
+    role: Literal["user"]
+    """Must be "user" to identify this as a user message"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the user message participant."""
+
+
+class DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: (
+        DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+DataInputMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataInputMessageOpenAISystemMessageParam(BaseModel):
+    content: Union[str, List[DataInputMessageOpenAISystemMessageParamContentUnionMember1]]
+    """The content of the "system prompt".
+
+    If multiple system messages are provided, they are concatenated. The underlying
+    Llama Stack code may also add other system messages (for example, for formatting
+    tool definitions).
+    """
+
+    role: Literal["system"]
+    """Must be "system" to identify this as a system message"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the system message participant."""
+
+
+class DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
+    BaseModel
+):
+    image_url: (
+        DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+DataInputMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataInputMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel):
+    arguments: Optional[str] = None
+
+    name: Optional[str] = None
+
+
+class DataInputMessageOpenAIAssistantMessageParamToolCall(BaseModel):
+    type: Literal["function"]
+
+    id: Optional[str] = None
+
+    function: Optional[DataInputMessageOpenAIAssistantMessageParamToolCallFunction] = None
+
+    index: Optional[int] = None
+
+
+class DataInputMessageOpenAIAssistantMessageParam(BaseModel):
+    role: Literal["assistant"]
+    """Must be "assistant" to identify this as the model's response"""
+
+    content: Union[str, List[DataInputMessageOpenAIAssistantMessageParamContentUnionMember1], None] = None
+    """The content of the model's response"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the assistant message participant."""
+
+    tool_calls: Optional[List[DataInputMessageOpenAIAssistantMessageParamToolCall]] = None
+    """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."""
+
+
+class DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: (
+        DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+DataInputMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataInputMessageOpenAIToolMessageParam(BaseModel):
+    content: Union[str, List[DataInputMessageOpenAIToolMessageParamContentUnionMember1]]
+    """The response content from the tool"""
+
+    role: Literal["tool"]
+    """Must be "tool" to identify this as a tool response"""
+
+    tool_call_id: str
+    """Unique identifier for the tool call this response is for"""
+
+
+class DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
+    BaseModel
+):
+    image_url: (
+        DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataInputMessageOpenAIDeveloperMessageParam(BaseModel):
+    content: Union[str, List[DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1]]
+    """The content of the developer message"""
+
+    role: Literal["developer"]
+    """Must be "developer" to identify this as a developer message"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the developer message participant."""
+
+
+DataInputMessage: TypeAlias = Annotated[
+    Union[
+        DataInputMessageOpenAIUserMessageParam,
+        DataInputMessageOpenAISystemMessageParam,
+        DataInputMessageOpenAIAssistantMessageParam,
+        DataInputMessageOpenAIToolMessageParam,
+        DataInputMessageOpenAIDeveloperMessageParam,
+    ],
+    PropertyInfo(discriminator="role"),
+]
+
+
+class Data(BaseModel):
+    id: str
+    """The ID of the chat completion"""
+
+    choices: List[DataChoice]
+    """List of choices"""
+
+    created: int
+    """The Unix timestamp in seconds when the chat completion was created"""
+
+    input_messages: List[DataInputMessage]
+
+    model: str
+    """The model that was used to generate the chat completion"""
+
+    object: Literal["chat.completion"]
+    """The object type, which will be "chat.completion" """
+
+
+class CompletionListResponse(BaseModel):
+    data: List[Data]
+
+    first_id: str
+
+    has_more: bool
+
+    last_id: str
+
+    object: Literal["list"]
diff --git a/src/llama_stack_client/types/chat/completion_retrieve_response.py b/src/llama_stack_client/types/chat/completion_retrieve_response.py
new file mode 100644
index 00000000..330c752d
--- /dev/null
+++ b/src/llama_stack_client/types/chat/completion_retrieve_response.py
@@ -0,0 +1,626 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+    "CompletionRetrieveResponse",
+    "Choice",
+    "ChoiceMessage",
+    "ChoiceMessageOpenAIUserMessageParam",
+    "ChoiceMessageOpenAIUserMessageParamContentUnionMember1",
+    "ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "ChoiceMessageOpenAISystemMessageParam",
+    "ChoiceMessageOpenAISystemMessageParamContentUnionMember1",
+    "ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "ChoiceMessageOpenAIAssistantMessageParam",
+    "ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1",
+    "ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "ChoiceMessageOpenAIAssistantMessageParamToolCall",
+    "ChoiceMessageOpenAIAssistantMessageParamToolCallFunction",
+    "ChoiceMessageOpenAIToolMessageParam",
+    "ChoiceMessageOpenAIToolMessageParamContentUnionMember1",
+    "ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "ChoiceMessageOpenAIDeveloperMessageParam",
+    "ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1",
+    "ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "ChoiceLogprobs",
+    "ChoiceLogprobsContent",
+    "ChoiceLogprobsContentTopLogprob",
+    "ChoiceLogprobsRefusal",
+    "ChoiceLogprobsRefusalTopLogprob",
+    "InputMessage",
+    "InputMessageOpenAIUserMessageParam",
+    "InputMessageOpenAIUserMessageParamContentUnionMember1",
+    "InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "InputMessageOpenAISystemMessageParam",
+    "InputMessageOpenAISystemMessageParamContentUnionMember1",
+    "InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "InputMessageOpenAIAssistantMessageParam",
+    "InputMessageOpenAIAssistantMessageParamContentUnionMember1",
+    "InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "InputMessageOpenAIAssistantMessageParamToolCall",
+    "InputMessageOpenAIAssistantMessageParamToolCallFunction",
+    "InputMessageOpenAIToolMessageParam",
+    "InputMessageOpenAIToolMessageParamContentUnionMember1",
+    "InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+    "InputMessageOpenAIDeveloperMessageParam",
+    "InputMessageOpenAIDeveloperMessageParamContentUnionMember1",
+    "InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
+    "InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
+    "InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
+]
+
+
+class ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+
+    type: Literal["image_url"]
+
+
+ChoiceMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class ChoiceMessageOpenAIUserMessageParam(BaseModel):
+    content: Union[str, List[ChoiceMessageOpenAIUserMessageParamContentUnionMember1]]
+    """The content of the message, which can include text and other media"""
+
+    role: Literal["user"]
+    """Must be "user" to identify this as a user message"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the user message participant."""
+
+
+class ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+
+    type: Literal["image_url"]
+
+
+ChoiceMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class ChoiceMessageOpenAISystemMessageParam(BaseModel):
+    content: Union[str, List[ChoiceMessageOpenAISystemMessageParamContentUnionMember1]]
+    """The content of the "system prompt".
+
+    If multiple system messages are provided, they are concatenated. The underlying
+    Llama Stack code may also add other system messages (for example, for formatting
+    tool definitions).
+    """
+
+    role: Literal["system"]
+    """Must be "system" to identify this as a system message"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the system message participant."""
+
+
+class ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: (
+        ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class ChoiceMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel):
+    arguments: Optional[str] = None
+
+    name: Optional[str] = None
+
+
+class ChoiceMessageOpenAIAssistantMessageParamToolCall(BaseModel):
+    type: Literal["function"]
+
+    id: Optional[str] = None
+
+    function: Optional[ChoiceMessageOpenAIAssistantMessageParamToolCallFunction] = None
+
+    index: Optional[int] = None
+
+
+class ChoiceMessageOpenAIAssistantMessageParam(BaseModel):
+    role: Literal["assistant"]
+    """Must be "assistant" to identify this as the model's response"""
+
+    content: Union[str, List[ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1], None] = None
+    """The content of the model's response"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the assistant message participant."""
+
+    tool_calls: Optional[List[ChoiceMessageOpenAIAssistantMessageParamToolCall]] = None
+    """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."""
+
+
+class ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+
+    type: Literal["image_url"]
+
+
+ChoiceMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class ChoiceMessageOpenAIToolMessageParam(BaseModel):
+    content: Union[str, List[ChoiceMessageOpenAIToolMessageParamContentUnionMember1]]
+    """The response content from the tool"""
+
+    role: Literal["tool"]
+    """Must be "tool" to identify this as a tool response"""
+
+    tool_call_id: str
+    """Unique identifier for the tool call this response is for"""
+
+
+class ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: (
+        ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class ChoiceMessageOpenAIDeveloperMessageParam(BaseModel):
+    content: Union[str, List[ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1]]
+    """The content of the developer message"""
+
+    role: Literal["developer"]
+    """Must be "developer" to identify this as a developer message"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the developer message participant."""
+
+
+ChoiceMessage: TypeAlias = Annotated[
+    Union[
+        ChoiceMessageOpenAIUserMessageParam,
+        ChoiceMessageOpenAISystemMessageParam,
+        ChoiceMessageOpenAIAssistantMessageParam,
+        ChoiceMessageOpenAIToolMessageParam,
+        ChoiceMessageOpenAIDeveloperMessageParam,
+    ],
+    PropertyInfo(discriminator="role"),
+]
+
+
+class ChoiceLogprobsContentTopLogprob(BaseModel):
+    token: str
+
+    logprob: float
+
+    bytes: Optional[List[int]] = None
+
+
+class ChoiceLogprobsContent(BaseModel):
+    token: str
+
+    logprob: float
+
+    top_logprobs: List[ChoiceLogprobsContentTopLogprob]
+
+    bytes: Optional[List[int]] = None
+
+
+class ChoiceLogprobsRefusalTopLogprob(BaseModel):
+    token: str
+
+    logprob: float
+
+    bytes: Optional[List[int]] = None
+
+
+class ChoiceLogprobsRefusal(BaseModel):
+    token: str
+
+    logprob: float
+
+    top_logprobs: List[ChoiceLogprobsRefusalTopLogprob]
+
+    bytes: Optional[List[int]] = None
+
+
+class ChoiceLogprobs(BaseModel):
+    content: Optional[List[ChoiceLogprobsContent]] = None
+    """(Optional) The log probabilities for the tokens in the message"""
+
+    refusal: Optional[List[ChoiceLogprobsRefusal]] = None
+    """(Optional) The log probabilities for the tokens in the message"""
+
+
+class Choice(BaseModel):
+    finish_reason: str
+    """The reason the model stopped generating"""
+
+    index: int
+    """The index of the choice"""
+
+    message: ChoiceMessage
+    """The message from the model"""
+
+    logprobs: Optional[ChoiceLogprobs] = None
+    """(Optional) The log probabilities for the tokens in the message"""
+
+
+class InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(BaseModel):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+
+    type: Literal["image_url"]
+
+
+InputMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class InputMessageOpenAIUserMessageParam(BaseModel):
+    content: Union[str, List[InputMessageOpenAIUserMessageParamContentUnionMember1]]
+    """The content of the message, which can include text and other media"""
+
+    role: Literal["user"]
+    """Must be "user" to identify this as a user message"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the user message participant."""
+
+
+class InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+
+    type: Literal["image_url"]
+
+
+InputMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class InputMessageOpenAISystemMessageParam(BaseModel):
+    content: Union[str, List[InputMessageOpenAISystemMessageParamContentUnionMember1]]
+    """The content of the "system prompt".
+
+    If multiple system messages are provided, they are concatenated. The underlying
+    Llama Stack code may also add other system messages (for example, for formatting
+    tool definitions).
+    """
+
+    role: Literal["system"]
+    """Must be "system" to identify this as a system message"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the system message participant."""
+
+
+class InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: (
+        InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+InputMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class InputMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel):
+    arguments: Optional[str] = None
+
+    name: Optional[str] = None
+
+
+class InputMessageOpenAIAssistantMessageParamToolCall(BaseModel):
+    type: Literal["function"]
+
+    id: Optional[str] = None
+
+    function: Optional[InputMessageOpenAIAssistantMessageParamToolCallFunction] = None
+
+    index: Optional[int] = None
+
+
+class InputMessageOpenAIAssistantMessageParam(BaseModel):
+    role: Literal["assistant"]
+    """Must be "assistant" to identify this as the model's response"""
+
+    content: Union[str, List[InputMessageOpenAIAssistantMessageParamContentUnionMember1], None] = None
+    """The content of the model's response"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the assistant message participant."""
+
+    tool_calls: Optional[List[InputMessageOpenAIAssistantMessageParamToolCall]] = None
+    """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."""
+
+
+class InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(BaseModel):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+
+    type: Literal["image_url"]
+
+
+InputMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class InputMessageOpenAIToolMessageParam(BaseModel):
+    content: Union[str, List[InputMessageOpenAIToolMessageParamContentUnionMember1]]
+    """The response content from the tool"""
+
+    role: Literal["tool"]
+    """Must be "tool" to identify this as a tool response"""
+
+    tool_call_id: str
+    """Unique identifier for the tool call this response is for"""
+
+
+class InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
+    BaseModel
+):
+    url: str
+
+    detail: Optional[str] = None
+
+
+class InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
+    image_url: (
+        InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
+    )
+
+    type: Literal["image_url"]
+
+
+InputMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
+        InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class InputMessageOpenAIDeveloperMessageParam(BaseModel):
+    content: Union[str, List[InputMessageOpenAIDeveloperMessageParamContentUnionMember1]]
+    """The content of the developer message"""
+
+    role: Literal["developer"]
+    """Must be "developer" to identify this as a developer message"""
+
+    name: Optional[str] = None
+    """(Optional) The name of the developer message participant."""
+
+
+InputMessage: TypeAlias = Annotated[
+    Union[
+        InputMessageOpenAIUserMessageParam,
+        InputMessageOpenAISystemMessageParam,
+        InputMessageOpenAIAssistantMessageParam,
+        InputMessageOpenAIToolMessageParam,
+        InputMessageOpenAIDeveloperMessageParam,
+    ],
+    PropertyInfo(discriminator="role"),
+]
+
+
+class CompletionRetrieveResponse(BaseModel):
+    id: str
+    """The ID of the chat completion"""
+
+    choices: List[Choice]
+    """List of choices"""
+
+    created: int
+    """The Unix timestamp in seconds when the chat completion was created"""
+
+    input_messages: List[InputMessage]
+
+    model: str
+    """The model that was used to generate the chat completion"""
+
+    object: Literal["chat.completion"]
+    """The object type, which will be "chat.completion" """
diff --git a/src/llama_stack_client/types/completion_create_params.py b/src/llama_stack_client/types/completion_create_params.py
index a92b733e..e43f05e4 100644
--- a/src/llama_stack_client/types/completion_create_params.py
+++ b/src/llama_stack_client/types/completion_create_params.py
@@ -17,63 +17,63 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     prompt: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]]]]
-    """The prompt to generate a completion for"""
+    """The prompt to generate a completion for."""
 
     best_of: int
-    """(Optional) The number of completions to generate"""
+    """(Optional) The number of completions to generate."""
 
     echo: bool
-    """(Optional) Whether to echo the prompt"""
+    """(Optional) Whether to echo the prompt."""
 
     frequency_penalty: float
-    """(Optional) The penalty for repeated tokens"""
+    """(Optional) The penalty for repeated tokens."""
 
     guided_choice: List[str]
 
     logit_bias: Dict[str, float]
-    """(Optional) The logit bias to use"""
+    """(Optional) The logit bias to use."""
 
     logprobs: bool
-    """(Optional) The log probabilities to use"""
+    """(Optional) The log probabilities to use."""
 
     max_tokens: int
-    """(Optional) The maximum number of tokens to generate"""
+    """(Optional) The maximum number of tokens to generate."""
 
     n: int
-    """(Optional) The number of completions to generate"""
+    """(Optional) The number of completions to generate."""
 
     presence_penalty: float
-    """(Optional) The penalty for repeated tokens"""
+    """(Optional) The penalty for repeated tokens."""
 
     prompt_logprobs: int
 
     seed: int
-    """(Optional) The seed to use"""
+    """(Optional) The seed to use."""
 
     stop: Union[str, List[str]]
-    """(Optional) The stop tokens to use"""
+    """(Optional) The stop tokens to use."""
 
     stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """(Optional) The stream options to use"""
+    """(Optional) The stream options to use."""
 
     temperature: float
-    """(Optional) The temperature to use"""
+    """(Optional) The temperature to use."""
 
     top_p: float
-    """(Optional) The top p to use"""
+    """(Optional) The top p to use."""
 
     user: str
-    """(Optional) The user to use"""
+    """(Optional) The user to use."""
 
 
 class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
     stream: Literal[False]
-    """(Optional) Whether to stream the response"""
+    """(Optional) Whether to stream the response."""
 
 
 class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
     stream: Required[Literal[True]]
-    """(Optional) Whether to stream the response"""
+    """(Optional) Whether to stream the response."""
 
 
 CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming]
diff --git a/src/llama_stack_client/types/dataset_list_response.py b/src/llama_stack_client/types/dataset_list_response.py
index 902c6274..42b27ab4 100644
--- a/src/llama_stack_client/types/dataset_list_response.py
+++ b/src/llama_stack_client/types/dataset_list_response.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Dict, List, Union
+from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from .._utils import PropertyInfo
@@ -50,8 +50,6 @@ class DatasetListResponseItem(BaseModel):
 
     provider_id: str
 
-    provider_resource_id: str
-
     purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]
     """Purpose of the dataset. Each purpose has a required input data schema."""
 
@@ -60,5 +58,7 @@ class DatasetListResponseItem(BaseModel):
 
     type: Literal["dataset"]
 
+    provider_resource_id: Optional[str] = None
+
 
 DatasetListResponse: TypeAlias = List[DatasetListResponseItem]
diff --git a/src/llama_stack_client/types/dataset_register_params.py b/src/llama_stack_client/types/dataset_register_params.py
index 824dd0a9..6fd5db3f 100644
--- a/src/llama_stack_client/types/dataset_register_params.py
+++ b/src/llama_stack_client/types/dataset_register_params.py
@@ -12,7 +12,7 @@ class DatasetRegisterParams(TypedDict, total=False):
     purpose: Required[Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]]
     """The purpose of the dataset.
 
-    One of - "post-training/messages": The dataset contains a messages column with
+    One of: - "post-training/messages": The dataset contains a messages column with
     list of messages for post-training. { "messages": [ {"role": "user", "content":
     "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] } -
     "eval/question-answer": The dataset contains a question column and an answer
@@ -41,7 +41,7 @@ class DatasetRegisterParams(TypedDict, total=False):
     """The ID of the dataset. If not provided, an ID will be generated."""
 
     metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """The metadata for the dataset. - E.g. {"description": "My dataset"}"""
+    """The metadata for the dataset. - E.g. {"description": "My dataset"}."""
 
 
 class SourceUriDataSource(TypedDict, total=False):
diff --git a/src/llama_stack_client/types/dataset_register_response.py b/src/llama_stack_client/types/dataset_register_response.py
index 8038b192..a79367bb 100644
--- a/src/llama_stack_client/types/dataset_register_response.py
+++ b/src/llama_stack_client/types/dataset_register_response.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Dict, List, Union
+from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from .._utils import PropertyInfo
@@ -41,8 +41,6 @@ class DatasetRegisterResponse(BaseModel):
 
     provider_id: str
 
-    provider_resource_id: str
-
     purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]
     """Purpose of the dataset. Each purpose has a required input data schema."""
 
@@ -50,3 +48,5 @@ class DatasetRegisterResponse(BaseModel):
     """A dataset that can be obtained from a URI."""
 
     type: Literal["dataset"]
+
+    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/dataset_retrieve_response.py b/src/llama_stack_client/types/dataset_retrieve_response.py
index debce418..ab96c387 100644
--- a/src/llama_stack_client/types/dataset_retrieve_response.py
+++ b/src/llama_stack_client/types/dataset_retrieve_response.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Dict, List, Union
+from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from .._utils import PropertyInfo
@@ -41,8 +41,6 @@ class DatasetRetrieveResponse(BaseModel):
 
     provider_id: str
 
-    provider_resource_id: str
-
     purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]
     """Purpose of the dataset. Each purpose has a required input data schema."""
 
@@ -50,3 +48,5 @@ class DatasetRetrieveResponse(BaseModel):
     """A dataset that can be obtained from a URI."""
 
     type: Literal["dataset"]
+
+    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/inference_batch_chat_completion_params.py b/src/llama_stack_client/types/inference_batch_chat_completion_params.py
index ca53fdbf..b5da0f0e 100644
--- a/src/llama_stack_client/types/inference_batch_chat_completion_params.py
+++ b/src/llama_stack_client/types/inference_batch_chat_completion_params.py
@@ -15,21 +15,32 @@
 
 class InferenceBatchChatCompletionParams(TypedDict, total=False):
     messages_batch: Required[Iterable[Iterable[Message]]]
+    """The messages to generate completions for."""
 
     model_id: Required[str]
+    """The identifier of the model to use.
+
+    The model must be registered with Llama Stack and available via the /models
+    endpoint.
+    """
 
     logprobs: Logprobs
+    """
+    (Optional) If specified, log probabilities for each token position will be
+    returned.
+    """
 
     response_format: ResponseFormat
-    """Configuration for JSON schema-guided response generation."""
+    """(Optional) Grammar specification for guided (structured) decoding."""
 
     sampling_params: SamplingParams
-    """Sampling parameters."""
+    """(Optional) Parameters to control the sampling strategy."""
 
     tool_config: ToolConfig
-    """Configuration for tool use."""
+    """(Optional) Configuration for tool use."""
 
     tools: Iterable[Tool]
+    """(Optional) List of tool definitions available to the model."""
 
 
 class Logprobs(TypedDict, total=False):
diff --git a/src/llama_stack_client/types/inference_batch_completion_params.py b/src/llama_stack_client/types/inference_batch_completion_params.py
index cbeb9309..d3db8e13 100644
--- a/src/llama_stack_client/types/inference_batch_completion_params.py
+++ b/src/llama_stack_client/types/inference_batch_completion_params.py
@@ -14,16 +14,26 @@
 
 class InferenceBatchCompletionParams(TypedDict, total=False):
     content_batch: Required[List[InterleavedContent]]
+    """The content to generate completions for."""
 
     model_id: Required[str]
+    """The identifier of the model to use.
+
+    The model must be registered with Llama Stack and available via the /models
+    endpoint.
+    """
 
     logprobs: Logprobs
+    """
+    (Optional) If specified, log probabilities for each token position will be
+    returned.
+    """
 
     response_format: ResponseFormat
-    """Configuration for JSON schema-guided response generation."""
+    """(Optional) Grammar specification for guided (structured) decoding."""
 
     sampling_params: SamplingParams
-    """Sampling parameters."""
+    """(Optional) Parameters to control the sampling strategy."""
 
 
 class Logprobs(TypedDict, total=False):
diff --git a/src/llama_stack_client/types/inference_chat_completion_params.py b/src/llama_stack_client/types/inference_chat_completion_params.py
index 18eb1bf7..746d3dee 100644
--- a/src/llama_stack_client/types/inference_chat_completion_params.py
+++ b/src/llama_stack_client/types/inference_chat_completion_params.py
@@ -22,7 +22,7 @@
 
 class InferenceChatCompletionParamsBase(TypedDict, total=False):
     messages: Required[Iterable[Message]]
-    """List of messages in the conversation"""
+    """List of messages in the conversation."""
 
     model_id: Required[str]
     """The identifier of the model to use.
@@ -47,7 +47,7 @@ class InferenceChatCompletionParamsBase(TypedDict, total=False):
     """
 
     sampling_params: SamplingParams
-    """Parameters to control the sampling strategy"""
+    """Parameters to control the sampling strategy."""
 
     tool_choice: Literal["auto", "required", "none"]
     """(Optional) Whether tool use is required or automatic.
@@ -70,7 +70,7 @@ class InferenceChatCompletionParamsBase(TypedDict, total=False):
     """
 
     tools: Iterable[Tool]
-    """(Optional) List of tool definitions available to the model"""
+    """(Optional) List of tool definitions available to the model."""
 
 
 class Logprobs(TypedDict, total=False):
diff --git a/src/llama_stack_client/types/inference_completion_params.py b/src/llama_stack_client/types/inference_completion_params.py
index 60ccefce..c122f017 100644
--- a/src/llama_stack_client/types/inference_completion_params.py
+++ b/src/llama_stack_client/types/inference_completion_params.py
@@ -19,7 +19,7 @@
 
 class InferenceCompletionParamsBase(TypedDict, total=False):
     content: Required[InterleavedContent]
-    """The content to generate a completion for"""
+    """The content to generate a completion for."""
 
     model_id: Required[str]
     """The identifier of the model to use.
@@ -35,10 +35,10 @@ class InferenceCompletionParamsBase(TypedDict, total=False):
     """
 
     response_format: ResponseFormat
-    """(Optional) Grammar specification for guided (structured) decoding"""
+    """(Optional) Grammar specification for guided (structured) decoding."""
 
     sampling_params: SamplingParams
-    """(Optional) Parameters to control the sampling strategy"""
+    """(Optional) Parameters to control the sampling strategy."""
 
 
 class Logprobs(TypedDict, total=False):
diff --git a/src/llama_stack_client/types/inference_step.py b/src/llama_stack_client/types/inference_step.py
index bfd92166..2aecb193 100644
--- a/src/llama_stack_client/types/inference_step.py
+++ b/src/llama_stack_client/types/inference_step.py
@@ -20,6 +20,7 @@ class InferenceStep(BaseModel):
     """The ID of the step."""
 
     step_type: Literal["inference"]
+    """Type of the step in an agent turn."""
 
     turn_id: str
     """The ID of the turn."""
diff --git a/src/llama_stack_client/types/memory_retrieval_step.py b/src/llama_stack_client/types/memory_retrieval_step.py
index bd8ab735..887e9986 100644
--- a/src/llama_stack_client/types/memory_retrieval_step.py
+++ b/src/llama_stack_client/types/memory_retrieval_step.py
@@ -18,6 +18,7 @@ class MemoryRetrievalStep(BaseModel):
     """The ID of the step."""
 
     step_type: Literal["memory_retrieval"]
+    """Type of the step in an agent turn."""
 
     turn_id: str
     """The ID of the turn."""
diff --git a/src/llama_stack_client/types/model.py b/src/llama_stack_client/types/model.py
index afa12810..dea24d53 100644
--- a/src/llama_stack_client/types/model.py
+++ b/src/llama_stack_client/types/model.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Dict, List, Union
+from typing import Dict, List, Union, Optional
 from typing_extensions import Literal
 
 from pydantic import Field as FieldInfo
@@ -19,6 +19,6 @@ class Model(BaseModel):
 
     provider_id: str
 
-    provider_resource_id: str
-
     type: Literal["model"]
+
+    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/model_register_params.py b/src/llama_stack_client/types/model_register_params.py
index be5d72cc..fb1d9fb6 100644
--- a/src/llama_stack_client/types/model_register_params.py
+++ b/src/llama_stack_client/types/model_register_params.py
@@ -10,11 +10,16 @@
 
 class ModelRegisterParams(TypedDict, total=False):
     model_id: Required[str]
+    """The identifier of the model to register."""
 
     metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """Any additional metadata for this model."""
 
     model_type: Literal["llm", "embedding"]
+    """The type of model to register."""
 
     provider_id: str
+    """The identifier of the provider."""
 
     provider_model_id: str
+    """The identifier of the model in the provider."""
diff --git a/src/llama_stack_client/types/post_training/job_artifacts_params.py b/src/llama_stack_client/types/post_training/job_artifacts_params.py
index 4f75a133..851ebf5f 100644
--- a/src/llama_stack_client/types/post_training/job_artifacts_params.py
+++ b/src/llama_stack_client/types/post_training/job_artifacts_params.py
@@ -9,3 +9,4 @@
 
 class JobArtifactsParams(TypedDict, total=False):
     job_uuid: Required[str]
+    """The UUID of the job to get the artifacts of."""
diff --git a/src/llama_stack_client/types/post_training/job_cancel_params.py b/src/llama_stack_client/types/post_training/job_cancel_params.py
index c9c30d84..3a976e87 100644
--- a/src/llama_stack_client/types/post_training/job_cancel_params.py
+++ b/src/llama_stack_client/types/post_training/job_cancel_params.py
@@ -9,3 +9,4 @@
 
 class JobCancelParams(TypedDict, total=False):
     job_uuid: Required[str]
+    """The UUID of the job to cancel."""
diff --git a/src/llama_stack_client/types/post_training/job_status_params.py b/src/llama_stack_client/types/post_training/job_status_params.py
index 8cf17b03..d5e040e0 100644
--- a/src/llama_stack_client/types/post_training/job_status_params.py
+++ b/src/llama_stack_client/types/post_training/job_status_params.py
@@ -9,3 +9,4 @@
 
 class JobStatusParams(TypedDict, total=False):
     job_uuid: Required[str]
+    """The UUID of the job to get the status of."""
diff --git a/src/llama_stack_client/types/post_training_preference_optimize_params.py b/src/llama_stack_client/types/post_training_preference_optimize_params.py
index 11392907..f7d998eb 100644
--- a/src/llama_stack_client/types/post_training_preference_optimize_params.py
+++ b/src/llama_stack_client/types/post_training_preference_optimize_params.py
@@ -17,16 +17,22 @@
 
 class PostTrainingPreferenceOptimizeParams(TypedDict, total=False):
     algorithm_config: Required[AlgorithmConfig]
+    """The algorithm configuration."""
 
     finetuned_model: Required[str]
+    """The model to fine-tune."""
 
     hyperparam_search_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """The hyperparam search configuration."""
 
     job_uuid: Required[str]
+    """The UUID of the job to create."""
 
     logger_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """The logger configuration."""
 
     training_config: Required[TrainingConfig]
+    """The training configuration."""
 
 
 class AlgorithmConfig(TypedDict, total=False):
diff --git a/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py b/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py
index ad298817..596ec18b 100644
--- a/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py
+++ b/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py
@@ -18,18 +18,25 @@
 
 class PostTrainingSupervisedFineTuneParams(TypedDict, total=False):
     hyperparam_search_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """The hyperparam search configuration."""
 
     job_uuid: Required[str]
+    """The UUID of the job to create."""
 
     logger_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """The logger configuration."""
 
     training_config: Required[TrainingConfig]
+    """The training configuration."""
 
     algorithm_config: AlgorithmConfigParam
+    """The algorithm configuration."""
 
     checkpoint_dir: str
+    """The directory to save checkpoint(s) to."""
 
     model: str
+    """The model to fine-tune."""
 
 
 class TrainingConfigDataConfig(TypedDict, total=False):
diff --git a/src/llama_stack_client/types/response_create_params.py b/src/llama_stack_client/types/response_create_params.py
index 5e5072f5..53cb6b62 100644
--- a/src/llama_stack_client/types/response_create_params.py
+++ b/src/llama_stack_client/types/response_create_params.py
@@ -2,16 +2,30 @@
 
 from __future__ import annotations
 
-from typing import Union, Iterable
+from typing import Dict, List, Union, Iterable
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 __all__ = [
     "ResponseCreateParamsBase",
     "InputUnionMember1",
-    "InputUnionMember1ContentUnionMember1",
-    "InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentText",
-    "InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "InputUnionMember1OpenAIResponseOutputMessageWebSearchToolCall",
+    "InputUnionMember1OpenAIResponseOutputMessageFunctionToolCall",
+    "InputUnionMember1OpenAIResponseInputFunctionToolCallOutput",
+    "InputUnionMember1OpenAIResponseMessage",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember1",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2",
     "Tool",
+    "ToolOpenAIResponseInputToolWebSearch",
+    "ToolOpenAIResponseInputToolFileSearch",
+    "ToolOpenAIResponseInputToolFileSearchRankingOptions",
+    "ToolOpenAIResponseInputToolFunction",
+    "ToolOpenAIResponseInputToolMcp",
+    "ToolOpenAIResponseInputToolMcpRequireApproval",
+    "ToolOpenAIResponseInputToolMcpRequireApprovalApprovalFilter",
+    "ToolOpenAIResponseInputToolMcpAllowedTools",
+    "ToolOpenAIResponseInputToolMcpAllowedToolsAllowedToolsFilter",
     "ResponseCreateParamsNonStreaming",
     "ResponseCreateParamsStreaming",
 ]
@@ -24,6 +38,8 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     model: Required[str]
     """The underlying LLM used for completions."""
 
+    instructions: str
+
     previous_response_id: str
     """
     (Optional) if specified, the new response will be a continuation of the previous
@@ -33,16 +49,56 @@ class ResponseCreateParamsBase(TypedDict, total=False):
 
     store: bool
 
+    temperature: float
+
     tools: Iterable[Tool]
 
 
-class InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentText(TypedDict, total=False):
+class InputUnionMember1OpenAIResponseOutputMessageWebSearchToolCall(TypedDict, total=False):
+    id: Required[str]
+
+    status: Required[str]
+
+    type: Required[Literal["web_search_call"]]
+
+
+class InputUnionMember1OpenAIResponseOutputMessageFunctionToolCall(TypedDict, total=False):
+    id: Required[str]
+
+    arguments: Required[str]
+
+    call_id: Required[str]
+
+    name: Required[str]
+
+    status: Required[str]
+
+    type: Required[Literal["function_call"]]
+
+
+class InputUnionMember1OpenAIResponseInputFunctionToolCallOutput(TypedDict, total=False):
+    call_id: Required[str]
+
+    output: Required[str]
+
+    type: Required[Literal["function_call_output"]]
+
+    id: str
+
+    status: str
+
+
+class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText(
+    TypedDict, total=False
+):
     text: Required[str]
 
     type: Required[Literal["input_text"]]
 
 
-class InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentImage(TypedDict, total=False):
+class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage(
+    TypedDict, total=False
+):
     detail: Required[Literal["low", "high", "auto"]]
 
     type: Required[Literal["input_image"]]
@@ -50,26 +106,118 @@ class InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentImage
     image_url: str
 
 
-InputUnionMember1ContentUnionMember1: TypeAlias = Union[
-    InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentText,
-    InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentImage,
+InputUnionMember1OpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
+    InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
+    InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
 ]
 
 
-class InputUnionMember1(TypedDict, total=False):
-    content: Required[Union[str, Iterable[InputUnionMember1ContentUnionMember1]]]
+class InputUnionMember1OpenAIResponseMessageContentUnionMember2(TypedDict, total=False):
+    text: Required[str]
+
+    type: Required[Literal["output_text"]]
+
+
+class InputUnionMember1OpenAIResponseMessage(TypedDict, total=False):
+    content: Required[
+        Union[
+            str,
+            Iterable[InputUnionMember1OpenAIResponseMessageContentUnionMember1],
+            Iterable[InputUnionMember1OpenAIResponseMessageContentUnionMember2],
+        ]
+    ]
 
     role: Required[Literal["system", "developer", "user", "assistant"]]
 
-    type: Literal["message"]
+    type: Required[Literal["message"]]
+
+    id: str
 
+    status: str
 
-class Tool(TypedDict, total=False):
+
+InputUnionMember1: TypeAlias = Union[
+    InputUnionMember1OpenAIResponseOutputMessageWebSearchToolCall,
+    InputUnionMember1OpenAIResponseOutputMessageFunctionToolCall,
+    InputUnionMember1OpenAIResponseInputFunctionToolCallOutput,
+    InputUnionMember1OpenAIResponseMessage,
+]
+
+
+class ToolOpenAIResponseInputToolWebSearch(TypedDict, total=False):
     type: Required[Literal["web_search", "web_search_preview_2025_03_11"]]
 
     search_context_size: str
 
 
+class ToolOpenAIResponseInputToolFileSearchRankingOptions(TypedDict, total=False):
+    ranker: str
+
+    score_threshold: float
+
+
+class ToolOpenAIResponseInputToolFileSearch(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+
+    vector_store_id: Required[List[str]]
+
+    ranking_options: ToolOpenAIResponseInputToolFileSearchRankingOptions
+
+
+class ToolOpenAIResponseInputToolFunction(TypedDict, total=False):
+    name: Required[str]
+
+    type: Required[Literal["function"]]
+
+    description: str
+
+    parameters: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+
+    strict: bool
+
+
+class ToolOpenAIResponseInputToolMcpRequireApprovalApprovalFilter(TypedDict, total=False):
+    always: List[str]
+
+    never: List[str]
+
+
+ToolOpenAIResponseInputToolMcpRequireApproval: TypeAlias = Union[
+    Literal["always", "never"], ToolOpenAIResponseInputToolMcpRequireApprovalApprovalFilter
+]
+
+
+class ToolOpenAIResponseInputToolMcpAllowedToolsAllowedToolsFilter(TypedDict, total=False):
+    tool_names: List[str]
+
+
+ToolOpenAIResponseInputToolMcpAllowedTools: TypeAlias = Union[
+    List[str], ToolOpenAIResponseInputToolMcpAllowedToolsAllowedToolsFilter
+]
+
+
+class ToolOpenAIResponseInputToolMcp(TypedDict, total=False):
+    require_approval: Required[ToolOpenAIResponseInputToolMcpRequireApproval]
+
+    server_label: Required[str]
+
+    server_url: Required[str]
+
+    type: Required[Literal["mcp"]]
+
+    allowed_tools: ToolOpenAIResponseInputToolMcpAllowedTools
+
+    headers: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+
+
+Tool: TypeAlias = Union[
+    ToolOpenAIResponseInputToolWebSearch,
+    ToolOpenAIResponseInputToolFileSearch,
+    ToolOpenAIResponseInputToolFunction,
+    ToolOpenAIResponseInputToolMcp,
+]
+
+
 class ResponseCreateParamsNonStreaming(ResponseCreateParamsBase, total=False):
     stream: Literal[False]
 
diff --git a/src/llama_stack_client/types/response_object.py b/src/llama_stack_client/types/response_object.py
index 6e1161e2..7a56af88 100644
--- a/src/llama_stack_client/types/response_object.py
+++ b/src/llama_stack_client/types/response_object.py
@@ -9,30 +9,59 @@
 __all__ = [
     "ResponseObject",
     "Output",
-    "OutputOpenAIResponseOutputMessage",
-    "OutputOpenAIResponseOutputMessageContent",
+    "OutputOpenAIResponseMessage",
+    "OutputOpenAIResponseMessageContentUnionMember1",
+    "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
+    "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "OutputOpenAIResponseMessageContentUnionMember2",
     "OutputOpenAIResponseOutputMessageWebSearchToolCall",
+    "OutputOpenAIResponseOutputMessageFunctionToolCall",
     "Error",
 ]
 
 
-class OutputOpenAIResponseOutputMessageContent(BaseModel):
+class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText(BaseModel):
     text: str
 
-    type: Literal["output_text"]
+    type: Literal["input_text"]
 
 
-class OutputOpenAIResponseOutputMessage(BaseModel):
-    id: str
+class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage(BaseModel):
+    detail: Literal["low", "high", "auto"]
 
-    content: List[OutputOpenAIResponseOutputMessageContent]
+    type: Literal["input_image"]
 
-    role: Literal["assistant"]
+    image_url: Optional[str] = None
 
-    status: str
+
+OutputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
+    Union[
+        OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
+        OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class OutputOpenAIResponseMessageContentUnionMember2(BaseModel):
+    text: str
+
+    type: Literal["output_text"]
+
+
+class OutputOpenAIResponseMessage(BaseModel):
+    content: Union[
+        str, List[OutputOpenAIResponseMessageContentUnionMember1], List[OutputOpenAIResponseMessageContentUnionMember2]
+    ]
+
+    role: Literal["system", "developer", "user", "assistant"]
 
     type: Literal["message"]
 
+    id: Optional[str] = None
+
+    status: Optional[str] = None
+
 
 class OutputOpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
     id: str
@@ -42,8 +71,26 @@ class OutputOpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
     type: Literal["web_search_call"]
 
 
+class OutputOpenAIResponseOutputMessageFunctionToolCall(BaseModel):
+    id: str
+
+    arguments: str
+
+    call_id: str
+
+    name: str
+
+    status: str
+
+    type: Literal["function_call"]
+
+
 Output: TypeAlias = Annotated[
-    Union[OutputOpenAIResponseOutputMessage, OutputOpenAIResponseOutputMessageWebSearchToolCall],
+    Union[
+        OutputOpenAIResponseMessage,
+        OutputOpenAIResponseOutputMessageWebSearchToolCall,
+        OutputOpenAIResponseOutputMessageFunctionToolCall,
+    ],
     PropertyInfo(discriminator="type"),
 ]
 
@@ -55,6 +102,16 @@ class Error(BaseModel):
 
 
 class ResponseObject(BaseModel):
+    
+    def output_text(self) -> str:
+        texts: List[str] = []
+        for output in self.output:
+            if output.type == "message":
+                for content in output.content:
+                    if content.type == "output_text":
+                        texts.append(content.text)
+        return "".join(texts)
+
     id: str
 
     created_at: int
diff --git a/src/llama_stack_client/types/safety_run_shield_params.py b/src/llama_stack_client/types/safety_run_shield_params.py
index d2eab4c0..764be674 100644
--- a/src/llama_stack_client/types/safety_run_shield_params.py
+++ b/src/llama_stack_client/types/safety_run_shield_params.py
@@ -12,7 +12,10 @@
 
 class SafetyRunShieldParams(TypedDict, total=False):
     messages: Required[Iterable[Message]]
+    """The messages to run the shield on."""
 
     params: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """The parameters of the shield."""
 
     shield_id: Required[str]
+    """The identifier of the shield to run."""
diff --git a/src/llama_stack_client/types/scoring_fn.py b/src/llama_stack_client/types/scoring_fn.py
index 459f090b..3569cb44 100644
--- a/src/llama_stack_client/types/scoring_fn.py
+++ b/src/llama_stack_client/types/scoring_fn.py
@@ -17,8 +17,6 @@ class ScoringFn(BaseModel):
 
     provider_id: str
 
-    provider_resource_id: str
-
     return_type: ReturnType
 
     type: Literal["scoring_function"]
@@ -26,3 +24,5 @@ class ScoringFn(BaseModel):
     description: Optional[str] = None
 
     params: Optional[ScoringFnParams] = None
+
+    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/scoring_fn_params.py b/src/llama_stack_client/types/scoring_fn_params.py
index 6f4a62b0..a46b46f5 100644
--- a/src/llama_stack_client/types/scoring_fn_params.py
+++ b/src/llama_stack_client/types/scoring_fn_params.py
@@ -10,35 +10,29 @@
 
 
 class LlmAsJudgeScoringFnParams(BaseModel):
-    judge_model: str
+    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
 
-    type: Literal["llm_as_judge"]
+    judge_model: str
 
-    aggregation_functions: Optional[
-        List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-    ] = None
+    judge_score_regexes: List[str]
 
-    judge_score_regexes: Optional[List[str]] = None
+    type: Literal["llm_as_judge"]
 
     prompt_template: Optional[str] = None
 
 
 class RegexParserScoringFnParams(BaseModel):
-    type: Literal["regex_parser"]
+    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
 
-    aggregation_functions: Optional[
-        List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-    ] = None
+    parsing_regexes: List[str]
 
-    parsing_regexes: Optional[List[str]] = None
+    type: Literal["regex_parser"]
 
 
 class BasicScoringFnParams(BaseModel):
-    type: Literal["basic"]
+    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
 
-    aggregation_functions: Optional[
-        List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-    ] = None
+    type: Literal["basic"]
 
 
 ScoringFnParams: TypeAlias = Annotated[
diff --git a/src/llama_stack_client/types/scoring_fn_params_param.py b/src/llama_stack_client/types/scoring_fn_params_param.py
index 4c255b52..b404bc89 100644
--- a/src/llama_stack_client/types/scoring_fn_params_param.py
+++ b/src/llama_stack_client/types/scoring_fn_params_param.py
@@ -9,29 +9,35 @@
 
 
 class LlmAsJudgeScoringFnParams(TypedDict, total=False):
-    judge_model: Required[str]
+    aggregation_functions: Required[
+        List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
+    ]
 
-    type: Required[Literal["llm_as_judge"]]
+    judge_model: Required[str]
 
-    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
+    judge_score_regexes: Required[List[str]]
 
-    judge_score_regexes: List[str]
+    type: Required[Literal["llm_as_judge"]]
 
     prompt_template: str
 
 
 class RegexParserScoringFnParams(TypedDict, total=False):
-    type: Required[Literal["regex_parser"]]
+    aggregation_functions: Required[
+        List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
+    ]
 
-    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
+    parsing_regexes: Required[List[str]]
 
-    parsing_regexes: List[str]
+    type: Required[Literal["regex_parser"]]
 
 
 class BasicScoringFnParams(TypedDict, total=False):
-    type: Required[Literal["basic"]]
+    aggregation_functions: Required[
+        List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
+    ]
 
-    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
+    type: Required[Literal["basic"]]
 
 
 ScoringFnParamsParam: TypeAlias = Union[LlmAsJudgeScoringFnParams, RegexParserScoringFnParams, BasicScoringFnParams]
diff --git a/src/llama_stack_client/types/scoring_function_register_params.py b/src/llama_stack_client/types/scoring_function_register_params.py
index 6c9cb1da..c9932710 100644
--- a/src/llama_stack_client/types/scoring_function_register_params.py
+++ b/src/llama_stack_client/types/scoring_function_register_params.py
@@ -12,13 +12,21 @@
 
 class ScoringFunctionRegisterParams(TypedDict, total=False):
     description: Required[str]
+    """The description of the scoring function."""
 
     return_type: Required[ReturnType]
 
     scoring_fn_id: Required[str]
+    """The ID of the scoring function to register."""
 
     params: ScoringFnParamsParam
+    """
+    The parameters for the scoring function for benchmark eval, these can be
+    overridden for app eval.
+    """
 
     provider_id: str
+    """The ID of the provider to use for the scoring function."""
 
     provider_scoring_fn_id: str
+    """The ID of the provider scoring function to use for the scoring function."""
diff --git a/src/llama_stack_client/types/scoring_score_batch_params.py b/src/llama_stack_client/types/scoring_score_batch_params.py
index aa12ac8c..28dfa86e 100644
--- a/src/llama_stack_client/types/scoring_score_batch_params.py
+++ b/src/llama_stack_client/types/scoring_score_batch_params.py
@@ -12,7 +12,10 @@
 
 class ScoringScoreBatchParams(TypedDict, total=False):
     dataset_id: Required[str]
+    """The ID of the dataset to score."""
 
     save_results_dataset: Required[bool]
+    """Whether to save the results to a dataset."""
 
     scoring_functions: Required[Dict[str, Optional[ScoringFnParamsParam]]]
+    """The scoring functions to use for the scoring."""
diff --git a/src/llama_stack_client/types/shared/query_config.py b/src/llama_stack_client/types/shared/query_config.py
index 679f7dcb..5695bc20 100644
--- a/src/llama_stack_client/types/shared/query_config.py
+++ b/src/llama_stack_client/types/shared/query_config.py
@@ -1,5 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import Optional
+
 from ..._models import BaseModel
 from .query_generator_config import QueryGeneratorConfig
 
@@ -7,8 +9,22 @@
 
 
 class QueryConfig(BaseModel):
+    chunk_template: str
+    """Template for formatting each retrieved chunk in the context.
+
+    Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
+    content string), {metadata} (chunk metadata dict). Default: "Result
+    {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
+    """
+
     max_chunks: int
+    """Maximum number of chunks to retrieve."""
 
     max_tokens_in_context: int
+    """Maximum number of tokens in the context."""
 
     query_generator_config: QueryGeneratorConfig
+    """Configuration for the query generator."""
+
+    mode: Optional[str] = None
+    """Search mode for retrieval—either "vector" or "keyword". Default "vector"."""
diff --git a/src/llama_stack_client/types/shared_params/query_config.py b/src/llama_stack_client/types/shared_params/query_config.py
index 94cb1b8c..ded8ff9e 100644
--- a/src/llama_stack_client/types/shared_params/query_config.py
+++ b/src/llama_stack_client/types/shared_params/query_config.py
@@ -10,8 +10,22 @@
 
 
 class QueryConfig(TypedDict, total=False):
+    chunk_template: Required[str]
+    """Template for formatting each retrieved chunk in the context.
+
+    Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
+    content string), {metadata} (chunk metadata dict). Default: "Result
+    {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
+    """
+
     max_chunks: Required[int]
+    """Maximum number of chunks to retrieve."""
 
     max_tokens_in_context: Required[int]
+    """Maximum number of tokens in the context."""
 
     query_generator_config: Required[QueryGeneratorConfig]
+    """Configuration for the query generator."""
+
+    mode: str
+    """Search mode for retrieval—either "vector" or "keyword". Default "vector"."""
diff --git a/src/llama_stack_client/types/shield.py b/src/llama_stack_client/types/shield.py
index 978bd255..ff5f01f1 100644
--- a/src/llama_stack_client/types/shield.py
+++ b/src/llama_stack_client/types/shield.py
@@ -13,8 +13,8 @@ class Shield(BaseModel):
 
     provider_id: str
 
-    provider_resource_id: str
-
     type: Literal["shield"]
 
     params: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
+
+    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/shield_call_step.py b/src/llama_stack_client/types/shield_call_step.py
index 38ca0b5c..e19734c6 100644
--- a/src/llama_stack_client/types/shield_call_step.py
+++ b/src/llama_stack_client/types/shield_call_step.py
@@ -15,6 +15,7 @@ class ShieldCallStep(BaseModel):
     """The ID of the step."""
 
     step_type: Literal["shield_call"]
+    """Type of the step in an agent turn."""
 
     turn_id: str
     """The ID of the turn."""
diff --git a/src/llama_stack_client/types/shield_register_params.py b/src/llama_stack_client/types/shield_register_params.py
index 2a51fd04..7ae0b2c1 100644
--- a/src/llama_stack_client/types/shield_register_params.py
+++ b/src/llama_stack_client/types/shield_register_params.py
@@ -10,9 +10,13 @@
 
 class ShieldRegisterParams(TypedDict, total=False):
     shield_id: Required[str]
+    """The identifier of the shield to register."""
 
     params: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """The parameters of the shield."""
 
     provider_id: str
+    """The identifier of the provider."""
 
     provider_shield_id: str
+    """The identifier of the shield in the provider."""
diff --git a/src/llama_stack_client/types/telemetry_get_span_tree_params.py b/src/llama_stack_client/types/telemetry_get_span_tree_params.py
index d9d647dd..7d309d3e 100644
--- a/src/llama_stack_client/types/telemetry_get_span_tree_params.py
+++ b/src/llama_stack_client/types/telemetry_get_span_tree_params.py
@@ -10,5 +10,7 @@
 
 class TelemetryGetSpanTreeParams(TypedDict, total=False):
     attributes_to_return: List[str]
+    """The attributes to return in the tree."""
 
     max_depth: int
+    """The maximum depth of the tree."""
diff --git a/src/llama_stack_client/types/telemetry_log_event_params.py b/src/llama_stack_client/types/telemetry_log_event_params.py
index ef536792..246b6526 100644
--- a/src/llama_stack_client/types/telemetry_log_event_params.py
+++ b/src/llama_stack_client/types/telemetry_log_event_params.py
@@ -11,5 +11,7 @@
 
 class TelemetryLogEventParams(TypedDict, total=False):
     event: Required[EventParam]
+    """The event to log."""
 
     ttl_seconds: Required[int]
+    """The time to live of the event."""
diff --git a/src/llama_stack_client/types/telemetry_query_spans_params.py b/src/llama_stack_client/types/telemetry_query_spans_params.py
index 897a024a..6429c08f 100644
--- a/src/llama_stack_client/types/telemetry_query_spans_params.py
+++ b/src/llama_stack_client/types/telemetry_query_spans_params.py
@@ -12,7 +12,10 @@
 
 class TelemetryQuerySpansParams(TypedDict, total=False):
     attribute_filters: Required[Iterable[QueryConditionParam]]
+    """The attribute filters to apply to the spans."""
 
     attributes_to_return: Required[List[str]]
+    """The attributes to return in the spans."""
 
     max_depth: int
+    """The maximum depth of the tree."""
diff --git a/src/llama_stack_client/types/telemetry_query_traces_params.py b/src/llama_stack_client/types/telemetry_query_traces_params.py
index 425b1a00..7c82ef14 100644
--- a/src/llama_stack_client/types/telemetry_query_traces_params.py
+++ b/src/llama_stack_client/types/telemetry_query_traces_params.py
@@ -12,9 +12,13 @@
 
 class TelemetryQueryTracesParams(TypedDict, total=False):
     attribute_filters: Iterable[QueryConditionParam]
+    """The attribute filters to apply to the traces."""
 
     limit: int
+    """The limit of traces to return."""
 
     offset: int
+    """The offset of the traces to return."""
 
     order_by: List[str]
+    """The order by of the traces to return."""
diff --git a/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py b/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py
index 5dc8419c..bb96f8e3 100644
--- a/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py
+++ b/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py
@@ -12,9 +12,13 @@
 
 class TelemetrySaveSpansToDatasetParams(TypedDict, total=False):
     attribute_filters: Required[Iterable[QueryConditionParam]]
+    """The attribute filters to apply to the spans."""
 
     attributes_to_save: Required[List[str]]
+    """The attributes to save to the dataset."""
 
     dataset_id: Required[str]
+    """The ID of the dataset to save the spans to."""
 
     max_depth: int
+    """The maximum depth of the tree."""
diff --git a/src/llama_stack_client/types/tool.py b/src/llama_stack_client/types/tool.py
index c1c0cce7..59e9300f 100644
--- a/src/llama_stack_client/types/tool.py
+++ b/src/llama_stack_client/types/tool.py
@@ -29,8 +29,6 @@ class Tool(BaseModel):
 
     provider_id: str
 
-    provider_resource_id: str
-
     tool_host: Literal["distribution", "client", "model_context_protocol"]
 
     toolgroup_id: str
@@ -38,3 +36,5 @@ class Tool(BaseModel):
     type: Literal["tool"]
 
     metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
+
+    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/tool_execution_step.py b/src/llama_stack_client/types/tool_execution_step.py
index 5eb2c23b..f68115fc 100644
--- a/src/llama_stack_client/types/tool_execution_step.py
+++ b/src/llama_stack_client/types/tool_execution_step.py
@@ -16,6 +16,7 @@ class ToolExecutionStep(BaseModel):
     """The ID of the step."""
 
     step_type: Literal["tool_execution"]
+    """Type of the step in an agent turn."""
 
     tool_calls: List[ToolCall]
     """The tool calls to execute."""
diff --git a/src/llama_stack_client/types/tool_group.py b/src/llama_stack_client/types/tool_group.py
index 480d1942..3389395a 100644
--- a/src/llama_stack_client/types/tool_group.py
+++ b/src/llama_stack_client/types/tool_group.py
@@ -17,10 +17,10 @@ class ToolGroup(BaseModel):
 
     provider_id: str
 
-    provider_resource_id: str
-
     type: Literal["tool_group"]
 
     args: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
 
     mcp_endpoint: Optional[McpEndpoint] = None
+
+    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/tool_list_params.py b/src/llama_stack_client/types/tool_list_params.py
index c0953896..38f4bf73 100644
--- a/src/llama_stack_client/types/tool_list_params.py
+++ b/src/llama_stack_client/types/tool_list_params.py
@@ -9,3 +9,4 @@
 
 class ToolListParams(TypedDict, total=False):
     toolgroup_id: str
+    """The ID of the tool group to list tools for."""
diff --git a/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py b/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py
index 08208b77..4599c693 100644
--- a/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py
+++ b/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py
@@ -18,3 +18,4 @@ class RagToolQueryParams(TypedDict, total=False):
     vector_db_ids: Required[List[str]]
 
     query_config: QueryConfig
+    """Configuration for the RAG query generation."""
diff --git a/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py b/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py
index 68b40462..03df2d40 100644
--- a/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py
+++ b/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py
@@ -10,5 +10,7 @@
 
 class ToolRuntimeInvokeToolParams(TypedDict, total=False):
     kwargs: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """A dictionary of arguments to pass to the tool."""
 
     tool_name: Required[str]
+    """The name of the tool to invoke."""
diff --git a/src/llama_stack_client/types/tool_runtime_list_tools_params.py b/src/llama_stack_client/types/tool_runtime_list_tools_params.py
index 99da7533..539e176d 100644
--- a/src/llama_stack_client/types/tool_runtime_list_tools_params.py
+++ b/src/llama_stack_client/types/tool_runtime_list_tools_params.py
@@ -9,8 +9,10 @@
 
 class ToolRuntimeListToolsParams(TypedDict, total=False):
     mcp_endpoint: McpEndpoint
+    """The MCP endpoint to use for the tool group."""
 
     tool_group_id: str
+    """The ID of the tool group to list tools for."""
 
 
 class McpEndpoint(TypedDict, total=False):
diff --git a/src/llama_stack_client/types/toolgroup_register_params.py b/src/llama_stack_client/types/toolgroup_register_params.py
index 8cb7af7f..a50c14c4 100644
--- a/src/llama_stack_client/types/toolgroup_register_params.py
+++ b/src/llama_stack_client/types/toolgroup_register_params.py
@@ -10,12 +10,16 @@
 
 class ToolgroupRegisterParams(TypedDict, total=False):
     provider_id: Required[str]
+    """The ID of the provider to use for the tool group."""
 
     toolgroup_id: Required[str]
+    """The ID of the tool group to register."""
 
     args: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """A dictionary of arguments to pass to the tool group."""
 
     mcp_endpoint: McpEndpoint
+    """The MCP endpoint to use for the tool group."""
 
 
 class McpEndpoint(TypedDict, total=False):
diff --git a/src/llama_stack_client/types/vector_db_list_response.py b/src/llama_stack_client/types/vector_db_list_response.py
index 0a110e2b..39161431 100644
--- a/src/llama_stack_client/types/vector_db_list_response.py
+++ b/src/llama_stack_client/types/vector_db_list_response.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List
+from typing import List, Optional
 from typing_extensions import Literal, TypeAlias
 
 from .._models import BaseModel
@@ -17,9 +17,9 @@ class VectorDBListResponseItem(BaseModel):
 
     provider_id: str
 
-    provider_resource_id: str
-
     type: Literal["vector_db"]
 
+    provider_resource_id: Optional[str] = None
+
 
 VectorDBListResponse: TypeAlias = List[VectorDBListResponseItem]
diff --git a/src/llama_stack_client/types/vector_db_register_params.py b/src/llama_stack_client/types/vector_db_register_params.py
index 6083bbce..734659a6 100644
--- a/src/llama_stack_client/types/vector_db_register_params.py
+++ b/src/llama_stack_client/types/vector_db_register_params.py
@@ -9,11 +9,16 @@
 
 class VectorDBRegisterParams(TypedDict, total=False):
     embedding_model: Required[str]
+    """The embedding model to use."""
 
     vector_db_id: Required[str]
+    """The identifier of the vector database to register."""
 
     embedding_dimension: int
+    """The dimension of the embedding model."""
 
     provider_id: str
+    """The identifier of the provider."""
 
     provider_vector_db_id: str
+    """The identifier of the vector database in the provider."""
diff --git a/src/llama_stack_client/types/vector_db_register_response.py b/src/llama_stack_client/types/vector_db_register_response.py
index cc4c201a..9c7a3166 100644
--- a/src/llama_stack_client/types/vector_db_register_response.py
+++ b/src/llama_stack_client/types/vector_db_register_response.py
@@ -1,5 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import Optional
 from typing_extensions import Literal
 
 from .._models import BaseModel
@@ -16,6 +17,6 @@ class VectorDBRegisterResponse(BaseModel):
 
     provider_id: str
 
-    provider_resource_id: str
-
     type: Literal["vector_db"]
+
+    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/vector_db_retrieve_response.py b/src/llama_stack_client/types/vector_db_retrieve_response.py
index 3bea2236..fb3597a5 100644
--- a/src/llama_stack_client/types/vector_db_retrieve_response.py
+++ b/src/llama_stack_client/types/vector_db_retrieve_response.py
@@ -1,5 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import Optional
 from typing_extensions import Literal
 
 from .._models import BaseModel
@@ -16,6 +17,6 @@ class VectorDBRetrieveResponse(BaseModel):
 
     provider_id: str
 
-    provider_resource_id: str
-
     type: Literal["vector_db"]
+
+    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/vector_io_insert_params.py b/src/llama_stack_client/types/vector_io_insert_params.py
index 5ac67c10..adc1642e 100644
--- a/src/llama_stack_client/types/vector_io_insert_params.py
+++ b/src/llama_stack_client/types/vector_io_insert_params.py
@@ -12,10 +12,13 @@
 
 class VectorIoInsertParams(TypedDict, total=False):
     chunks: Required[Iterable[Chunk]]
+    """The chunks to insert."""
 
     vector_db_id: Required[str]
+    """The identifier of the vector database to insert the chunks into."""
 
     ttl_seconds: int
+    """The time to live of the chunks."""
 
 
 class Chunk(TypedDict, total=False):
diff --git a/src/llama_stack_client/types/vector_io_query_params.py b/src/llama_stack_client/types/vector_io_query_params.py
index 2fe675d1..f0569a58 100644
--- a/src/llama_stack_client/types/vector_io_query_params.py
+++ b/src/llama_stack_client/types/vector_io_query_params.py
@@ -12,8 +12,10 @@
 
 class VectorIoQueryParams(TypedDict, total=False):
     query: Required[InterleavedContent]
-    """A image content item"""
+    """The query to search for."""
 
     vector_db_id: Required[str]
+    """The identifier of the vector database to query."""
 
     params: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """The parameters of the query."""
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 5c3d96c3..ff450202 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -9,7 +9,11 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.chat import CompletionCreateResponse
+from llama_stack_client.types.chat import (
+    CompletionListResponse,
+    CompletionCreateResponse,
+    CompletionRetrieveResponse,
+)
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -187,6 +191,79 @@ def test_streaming_response_create_overload_2(self, client: LlamaStackClient) ->
 
         assert cast(Any, response.is_closed) is True
 
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackClient) -> None:
+        completion = client.chat.completions.retrieve(
+            "completion_id",
+        )
+        assert_matches_type(CompletionRetrieveResponse, completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
+        response = client.chat.completions.with_raw_response.retrieve(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(CompletionRetrieveResponse, completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
+        with client.chat.completions.with_streaming_response.retrieve(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(CompletionRetrieveResponse, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            client.chat.completions.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: LlamaStackClient) -> None:
+        completion = client.chat.completions.list()
+        assert_matches_type(CompletionListResponse, completion, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
+        completion = client.chat.completions.list(
+            after="after",
+            limit=0,
+            model="model",
+            order="asc",
+        )
+        assert_matches_type(CompletionListResponse, completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: LlamaStackClient) -> None:
+        response = client.chat.completions.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(CompletionListResponse, completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
+        with client.chat.completions.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(CompletionListResponse, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
 
 class TestAsyncCompletions:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
@@ -360,3 +437,76 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncLla
             await stream.close()
 
         assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        completion = await async_client.chat.completions.retrieve(
+            "completion_id",
+        )
+        assert_matches_type(CompletionRetrieveResponse, completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.chat.completions.with_raw_response.retrieve(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = await response.parse()
+        assert_matches_type(CompletionRetrieveResponse, completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.chat.completions.with_streaming_response.retrieve(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(CompletionRetrieveResponse, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            await async_client.chat.completions.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
+        completion = await async_client.chat.completions.list()
+        assert_matches_type(CompletionListResponse, completion, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        completion = await async_client.chat.completions.list(
+            after="after",
+            limit=0,
+            model="model",
+            order="asc",
+        )
+        assert_matches_type(CompletionListResponse, completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.chat.completions.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = await response.parse()
+        assert_matches_type(CompletionListResponse, completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.chat.completions.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(CompletionListResponse, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_eval.py b/tests/api_resources/test_eval.py
index c519056b..8d04c104 100644
--- a/tests/api_resources/test_eval.py
+++ b/tests/api_resources/test_eval.py
@@ -32,7 +32,9 @@ def test_method_evaluate_rows(self, client: LlamaStackClient) -> None:
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -63,10 +65,10 @@ def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) ->
                 },
                 "scoring_params": {
                     "foo": {
-                        "judge_model": "judge_model",
-                        "type": "llm_as_judge",
                         "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
                         "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
                         "prompt_template": "prompt_template",
                     }
                 },
@@ -89,7 +91,9 @@ def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None:
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -115,7 +119,9 @@ def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> Non
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -144,7 +150,9 @@ def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None:
                     },
                     "scoring_params": {
                         "foo": {
+                            "aggregation_functions": ["average"],
                             "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
                             "type": "llm_as_judge",
                         }
                     },
@@ -165,7 +173,9 @@ def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -196,10 +206,10 @@ def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClie
                 },
                 "scoring_params": {
                     "foo": {
-                        "judge_model": "judge_model",
-                        "type": "llm_as_judge",
                         "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
                         "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
                         "prompt_template": "prompt_template",
                     }
                 },
@@ -222,7 +232,9 @@ def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> Non
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -248,7 +260,9 @@ def test_streaming_response_evaluate_rows_alpha(self, client: LlamaStackClient)
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -277,7 +291,9 @@ def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None
                     },
                     "scoring_params": {
                         "foo": {
+                            "aggregation_functions": ["average"],
                             "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
                             "type": "llm_as_judge",
                         }
                     },
@@ -298,7 +314,9 @@ def test_method_run_eval(self, client: LlamaStackClient) -> None:
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -327,10 +345,10 @@ def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None
                 },
                 "scoring_params": {
                     "foo": {
-                        "judge_model": "judge_model",
-                        "type": "llm_as_judge",
                         "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
                         "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
                         "prompt_template": "prompt_template",
                     }
                 },
@@ -351,7 +369,9 @@ def test_raw_response_run_eval(self, client: LlamaStackClient) -> None:
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -375,7 +395,9 @@ def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None:
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -402,7 +424,9 @@ def test_path_params_run_eval(self, client: LlamaStackClient) -> None:
                     },
                     "scoring_params": {
                         "foo": {
+                            "aggregation_functions": ["average"],
                             "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
                             "type": "llm_as_judge",
                         }
                     },
@@ -421,7 +445,9 @@ def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None:
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -450,10 +476,10 @@ def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) -
                 },
                 "scoring_params": {
                     "foo": {
-                        "judge_model": "judge_model",
-                        "type": "llm_as_judge",
                         "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
                         "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
                         "prompt_template": "prompt_template",
                     }
                 },
@@ -474,7 +500,9 @@ def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -498,7 +526,9 @@ def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> No
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -525,7 +555,9 @@ def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None:
                     },
                     "scoring_params": {
                         "foo": {
+                            "aggregation_functions": ["average"],
                             "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
                             "type": "llm_as_judge",
                         }
                     },
@@ -548,7 +580,9 @@ async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -579,10 +613,10 @@ async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLla
                 },
                 "scoring_params": {
                     "foo": {
-                        "judge_model": "judge_model",
-                        "type": "llm_as_judge",
                         "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
                         "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
                         "prompt_template": "prompt_template",
                     }
                 },
@@ -605,7 +639,9 @@ async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackCli
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -631,7 +667,9 @@ async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaSt
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -660,7 +698,9 @@ async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClie
                     },
                     "scoring_params": {
                         "foo": {
+                            "aggregation_functions": ["average"],
                             "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
                             "type": "llm_as_judge",
                         }
                     },
@@ -681,7 +721,9 @@ async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackCli
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -712,10 +754,10 @@ async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: As
                 },
                 "scoring_params": {
                     "foo": {
-                        "judge_model": "judge_model",
-                        "type": "llm_as_judge",
                         "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
                         "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
                         "prompt_template": "prompt_template",
                     }
                 },
@@ -738,7 +780,9 @@ async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaSt
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -764,7 +808,9 @@ async def test_streaming_response_evaluate_rows_alpha(self, async_client: AsyncL
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -793,7 +839,9 @@ async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaSta
                     },
                     "scoring_params": {
                         "foo": {
+                            "aggregation_functions": ["average"],
                             "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
                             "type": "llm_as_judge",
                         }
                     },
@@ -814,7 +862,9 @@ async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> Non
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -843,10 +893,10 @@ async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaSta
                 },
                 "scoring_params": {
                     "foo": {
-                        "judge_model": "judge_model",
-                        "type": "llm_as_judge",
                         "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
                         "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
                         "prompt_template": "prompt_template",
                     }
                 },
@@ -867,7 +917,9 @@ async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient)
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -891,7 +943,9 @@ async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackCl
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -918,7 +972,9 @@ async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) -
                     },
                     "scoring_params": {
                         "foo": {
+                            "aggregation_functions": ["average"],
                             "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
                             "type": "llm_as_judge",
                         }
                     },
@@ -937,7 +993,9 @@ async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient)
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -966,10 +1024,10 @@ async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLl
                 },
                 "scoring_params": {
                     "foo": {
-                        "judge_model": "judge_model",
-                        "type": "llm_as_judge",
                         "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
                         "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
                         "prompt_template": "prompt_template",
                     }
                 },
@@ -990,7 +1048,9 @@ async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackCl
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -1014,7 +1074,9 @@ async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaS
                 },
                 "scoring_params": {
                     "foo": {
+                        "aggregation_functions": ["average"],
                         "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
                         "type": "llm_as_judge",
                     }
                 },
@@ -1041,7 +1103,9 @@ async def test_path_params_run_eval_alpha(self, async_client: AsyncLlamaStackCli
                     },
                     "scoring_params": {
                         "foo": {
+                            "aggregation_functions": ["average"],
                             "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
                             "type": "llm_as_judge",
                         }
                     },
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
index 0a2a0929..74e13e8d 100644
--- a/tests/api_resources/test_responses.py
+++ b/tests/api_resources/test_responses.py
@@ -30,9 +30,11 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
         response = client.responses.create(
             input="string",
             model="model",
+            instructions="instructions",
             previous_response_id="previous_response_id",
             store=True,
             stream=False,
+            temperature=0,
             tools=[
                 {
                     "type": "web_search",
@@ -83,8 +85,10 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
             input="string",
             model="model",
             stream=True,
+            instructions="instructions",
             previous_response_id="previous_response_id",
             store=True,
+            temperature=0,
             tools=[
                 {
                     "type": "web_search",
@@ -124,14 +128,14 @@ def test_streaming_response_create_overload_2(self, client: LlamaStackClient) ->
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
         response = client.responses.retrieve(
-            "id",
+            "response_id",
         )
         assert_matches_type(ResponseObject, response, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
         http_response = client.responses.with_raw_response.retrieve(
-            "id",
+            "response_id",
         )
 
         assert http_response.is_closed is True
@@ -142,7 +146,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
         with client.responses.with_streaming_response.retrieve(
-            "id",
+            "response_id",
         ) as http_response:
             assert not http_response.is_closed
             assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -154,7 +158,7 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"):
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
             client.responses.with_raw_response.retrieve(
                 "",
             )
@@ -176,9 +180,11 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
         response = await async_client.responses.create(
             input="string",
             model="model",
+            instructions="instructions",
             previous_response_id="previous_response_id",
             store=True,
             stream=False,
+            temperature=0,
             tools=[
                 {
                     "type": "web_search",
@@ -229,8 +235,10 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             input="string",
             model="model",
             stream=True,
+            instructions="instructions",
             previous_response_id="previous_response_id",
             store=True,
+            temperature=0,
             tools=[
                 {
                     "type": "web_search",
@@ -270,14 +278,14 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncLla
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.responses.retrieve(
-            "id",
+            "response_id",
         )
         assert_matches_type(ResponseObject, response, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         http_response = await async_client.responses.with_raw_response.retrieve(
-            "id",
+            "response_id",
         )
 
         assert http_response.is_closed is True
@@ -288,7 +296,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.responses.with_streaming_response.retrieve(
-            "id",
+            "response_id",
         ) as http_response:
             assert not http_response.is_closed
             assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -300,7 +308,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
 
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"):
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
             await async_client.responses.with_raw_response.retrieve(
                 "",
             )
diff --git a/tests/api_resources/test_scoring.py b/tests/api_resources/test_scoring.py
index da61ebcb..ca818363 100644
--- a/tests/api_resources/test_scoring.py
+++ b/tests/api_resources/test_scoring.py
@@ -26,7 +26,9 @@ def test_method_score(self, client: LlamaStackClient) -> None:
             input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
+                    "aggregation_functions": ["average"],
                     "judge_model": "judge_model",
+                    "judge_score_regexes": ["string"],
                     "type": "llm_as_judge",
                 }
             },
@@ -39,7 +41,9 @@ def test_raw_response_score(self, client: LlamaStackClient) -> None:
             input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
+                    "aggregation_functions": ["average"],
                     "judge_model": "judge_model",
+                    "judge_score_regexes": ["string"],
                     "type": "llm_as_judge",
                 }
             },
@@ -56,7 +60,9 @@ def test_streaming_response_score(self, client: LlamaStackClient) -> None:
             input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
+                    "aggregation_functions": ["average"],
                     "judge_model": "judge_model",
+                    "judge_score_regexes": ["string"],
                     "type": "llm_as_judge",
                 }
             },
@@ -76,7 +82,9 @@ def test_method_score_batch(self, client: LlamaStackClient) -> None:
             save_results_dataset=True,
             scoring_functions={
                 "foo": {
+                    "aggregation_functions": ["average"],
                     "judge_model": "judge_model",
+                    "judge_score_regexes": ["string"],
                     "type": "llm_as_judge",
                 }
             },
@@ -90,7 +98,9 @@ def test_raw_response_score_batch(self, client: LlamaStackClient) -> None:
             save_results_dataset=True,
             scoring_functions={
                 "foo": {
+                    "aggregation_functions": ["average"],
                     "judge_model": "judge_model",
+                    "judge_score_regexes": ["string"],
                     "type": "llm_as_judge",
                 }
             },
@@ -108,7 +118,9 @@ def test_streaming_response_score_batch(self, client: LlamaStackClient) -> None:
             save_results_dataset=True,
             scoring_functions={
                 "foo": {
+                    "aggregation_functions": ["average"],
                     "judge_model": "judge_model",
+                    "judge_score_regexes": ["string"],
                     "type": "llm_as_judge",
                 }
             },
@@ -131,7 +143,9 @@ async def test_method_score(self, async_client: AsyncLlamaStackClient) -> None:
             input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
+                    "aggregation_functions": ["average"],
                     "judge_model": "judge_model",
+                    "judge_score_regexes": ["string"],
                     "type": "llm_as_judge",
                 }
             },
@@ -144,7 +158,9 @@ async def test_raw_response_score(self, async_client: AsyncLlamaStackClient) ->
             input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
+                    "aggregation_functions": ["average"],
                     "judge_model": "judge_model",
+                    "judge_score_regexes": ["string"],
                     "type": "llm_as_judge",
                 }
             },
@@ -161,7 +177,9 @@ async def test_streaming_response_score(self, async_client: AsyncLlamaStackClien
             input_rows=[{"foo": True}],
             scoring_functions={
                 "foo": {
+                    "aggregation_functions": ["average"],
                     "judge_model": "judge_model",
+                    "judge_score_regexes": ["string"],
                     "type": "llm_as_judge",
                 }
             },
@@ -181,7 +199,9 @@ async def test_method_score_batch(self, async_client: AsyncLlamaStackClient) ->
             save_results_dataset=True,
             scoring_functions={
                 "foo": {
+                    "aggregation_functions": ["average"],
                     "judge_model": "judge_model",
+                    "judge_score_regexes": ["string"],
                     "type": "llm_as_judge",
                 }
             },
@@ -195,7 +215,9 @@ async def test_raw_response_score_batch(self, async_client: AsyncLlamaStackClien
             save_results_dataset=True,
             scoring_functions={
                 "foo": {
+                    "aggregation_functions": ["average"],
                     "judge_model": "judge_model",
+                    "judge_score_regexes": ["string"],
                     "type": "llm_as_judge",
                 }
             },
@@ -213,7 +235,9 @@ async def test_streaming_response_score_batch(self, async_client: AsyncLlamaStac
             save_results_dataset=True,
             scoring_functions={
                 "foo": {
+                    "aggregation_functions": ["average"],
                     "judge_model": "judge_model",
+                    "judge_score_regexes": ["string"],
                     "type": "llm_as_judge",
                 }
             },
diff --git a/tests/api_resources/test_scoring_functions.py b/tests/api_resources/test_scoring_functions.py
index 5806bf59..d58d5c60 100644
--- a/tests/api_resources/test_scoring_functions.py
+++ b/tests/api_resources/test_scoring_functions.py
@@ -99,10 +99,10 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None
             return_type={"type": "string"},
             scoring_fn_id="scoring_fn_id",
             params={
-                "judge_model": "judge_model",
-                "type": "llm_as_judge",
                 "aggregation_functions": ["average"],
+                "judge_model": "judge_model",
                 "judge_score_regexes": ["string"],
+                "type": "llm_as_judge",
                 "prompt_template": "prompt_template",
             },
             provider_id="provider_id",
@@ -221,10 +221,10 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta
             return_type={"type": "string"},
             scoring_fn_id="scoring_fn_id",
             params={
-                "judge_model": "judge_model",
-                "type": "llm_as_judge",
                 "aggregation_functions": ["average"],
+                "judge_model": "judge_model",
                 "judge_score_regexes": ["string"],
+                "type": "llm_as_judge",
                 "prompt_template": "prompt_template",
             },
             provider_id="provider_id",
diff --git a/tests/api_resources/tool_runtime/test_rag_tool.py b/tests/api_resources/tool_runtime/test_rag_tool.py
index e687df20..4169e085 100644
--- a/tests/api_resources/tool_runtime/test_rag_tool.py
+++ b/tests/api_resources/tool_runtime/test_rag_tool.py
@@ -86,12 +86,14 @@ def test_method_query_with_all_params(self, client: LlamaStackClient) -> None:
             content="string",
             vector_db_ids=["string"],
             query_config={
+                "chunk_template": "chunk_template",
                 "max_chunks": 0,
                 "max_tokens_in_context": 0,
                 "query_generator_config": {
                     "separator": "separator",
                     "type": "default",
                 },
+                "mode": "mode",
             },
         )
         assert_matches_type(QueryResult, rag_tool, path=["response"])
@@ -195,12 +197,14 @@ async def test_method_query_with_all_params(self, async_client: AsyncLlamaStackC
             content="string",
             vector_db_ids=["string"],
             query_config={
+                "chunk_template": "chunk_template",
                 "max_chunks": 0,
                 "max_tokens_in_context": 0,
                 "query_generator_config": {
                     "separator": "separator",
                     "type": "default",
                 },
+                "mode": "mode",
             },
         )
         assert_matches_type(QueryResult, rag_tool, path=["response"])
diff --git a/tests/test_utils/test_proxy.py b/tests/test_utils/test_proxy.py
index 9cefe4ea..76a29efd 100644
--- a/tests/test_utils/test_proxy.py
+++ b/tests/test_utils/test_proxy.py
@@ -21,3 +21,14 @@ def test_recursive_proxy() -> None:
     assert dir(proxy) == []
     assert type(proxy).__name__ == "RecursiveLazyProxy"
     assert type(operator.attrgetter("name.foo.bar.baz")(proxy)).__name__ == "RecursiveLazyProxy"
+
+
+def test_isinstance_does_not_error() -> None:
+    class AlwaysErrorProxy(LazyProxy[Any]):
+        @override
+        def __load__(self) -> Any:
+            raise RuntimeError("Mocking missing dependency")
+
+    proxy = AlwaysErrorProxy()
+    assert not isinstance(proxy, dict)
+    assert isinstance(proxy, LazyProxy)