From b36e2ab8661e4913838c2cb4501156b290876da0 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 30 Oct 2025 18:59:34 +0000
Subject: [PATCH 1/9] chore(api)!: /v1/inspect only lists v1 apis by default

https://github.com/llamastack/llama-stack/pull/3948
---
 .stats.yml                                    |    6 +-
 api.md                                        |  190 +--
 src/llama_stack_client/_client.py             |   76 --
 src/llama_stack_client/resources/__init__.py  |   28 -
 .../resources/alpha/__init__.py               |   95 --
 .../resources/alpha/agents/__init__.py        |   67 -
 .../resources/alpha/agents/agents.py          |  534 --------
 .../resources/alpha/agents/session.py         |  477 -------
 .../resources/alpha/agents/steps.py           |  187 ---
 .../resources/alpha/agents/turn.py            |  881 -------------
 .../resources/alpha/alpha.py                  |  236 ----
 .../resources/alpha/benchmarks.py             |  365 ------
 .../resources/alpha/eval/__init__.py          |   39 -
 .../resources/alpha/eval/eval.py              |  536 --------
 .../resources/alpha/eval/jobs.py              |  346 -----
 .../resources/alpha/inference.py              |  224 ----
 .../resources/alpha/post_training/__init__.py |   39 -
 .../resources/alpha/post_training/job.py      |  410 ------
 .../alpha/post_training/post_training.py      |  399 ------
 .../resources/beta/__init__.py                |   33 -
 src/llama_stack_client/resources/beta/beta.py |  102 --
 .../resources/beta/datasets.py                |  682 ----------
 .../resources/responses/responses.py          |   22 +
 src/llama_stack_client/resources/routes.py    |   37 +-
 src/llama_stack_client/types/__init__.py      |    5 +-
 .../types/alpha/__init__.py                   |   34 -
 .../types/alpha/agent_create_params.py        |   20 -
 .../types/alpha/agent_create_response.py      |   16 -
 .../types/alpha/agent_list_params.py          |   21 -
 .../types/alpha/agent_list_response.py        |   24 -
 .../types/alpha/agent_retrieve_response.py    |   25 -
 .../types/alpha/agents/__init__.py            |   13 -
 .../agent_turn_response_stream_chunk.py       |   17 -
 .../types/alpha/agents/session.py             |   29 -
 .../alpha/agents/session_create_params.py     |   18 -
 .../alpha/agents/session_create_response.py   |   16 -
 .../types/alpha/agents/session_list_params.py |   21 -
 .../alpha/agents/session_list_response.py     |   24 -
 .../alpha/agents/session_retrieve_params.py   |   22 -
 .../alpha/agents/step_retrieve_response.py    |   29 -
 .../types/alpha/agents/turn.py                |  122 --
 .../types/alpha/agents/turn_create_params.py  |  170 ---
 .../types/alpha/agents/turn_response_event.py |  166 ---
 .../types/alpha/agents/turn_resume_params.py  |   38 -
 .../types/alpha/algorithm_config_param.py     |   56 -
 .../types/alpha/benchmark.py                  |   34 -
 .../types/alpha/benchmark_config_param.py     |   59 -
 .../types/alpha/benchmark_list_response.py    |   16 -
 .../types/alpha/benchmark_register_params.py  |   36 -
 .../alpha/eval_evaluate_rows_alpha_params.py  |   28 -
 .../types/alpha/eval_evaluate_rows_params.py  |   28 -
 .../types/alpha/eval_run_eval_alpha_params.py |   20 -
 .../types/alpha/eval_run_eval_params.py       |   20 -
 .../types/alpha/evaluate_response.py          |   22 -
 .../types/alpha/inference_rerank_params.py    |  112 --
 .../types/alpha/inference_rerank_response.py  |   29 -
 .../types/alpha/inference_step.py             |   38 -
 src/llama_stack_client/types/alpha/job.py     |   21 -
 .../types/alpha/list_benchmarks_response.py   |   16 -
 .../alpha/list_post_training_jobs_response.py |   16 -
 .../types/alpha/memory_retrieval_step.py      |   39 -
 .../types/alpha/post_training/__init__.py     |    7 -
 .../post_training/job_artifacts_params.py     |   18 -
 .../post_training/job_artifacts_response.py   |   56 -
 .../alpha/post_training/job_cancel_params.py  |   18 -
 .../alpha/post_training/job_list_response.py  |   21 -
 .../alpha/post_training/job_status_params.py  |   18 -
 .../post_training/job_status_response.py      |   72 --
 .../types/alpha/post_training_job.py          |   15 -
 ...ost_training_preference_optimize_params.py |  129 --
 ...st_training_supervised_fine_tune_params.py |  125 --
 .../types/alpha/shield_call_step.py           |   36 -
 .../types/alpha/tool_execution_step.py        |   40 -
 .../types/alpha/tool_response.py              |   29 -
 .../types/alpha/tool_response_param.py        |   30 -
 src/llama_stack_client/types/beta/__init__.py |    9 -
 .../types/beta/dataset_appendrows_params.py   |   19 -
 .../types/beta/dataset_iterrows_params.py     |   21 -
 .../types/beta/dataset_iterrows_response.py   |   24 -
 .../types/beta/dataset_list_response.py       |   72 --
 .../types/beta/dataset_register_params.py     |   75 --
 .../types/beta/dataset_register_response.py   |   60 -
 .../types/beta/dataset_retrieve_response.py   |   60 -
 .../types/beta/list_datasets_response.py      |   17 -
 .../types/conversation_create_params.py       |   22 +
 .../types/conversations/item_create_params.py |   22 +
 .../conversations/item_create_response.py     |   22 +
 .../types/conversations/item_get_response.py  |   22 +
 .../types/conversations/item_list_response.py |   22 +
 .../types/response_create_params.py           |   93 ++
 .../types/response_list_response.py           |  116 ++
 .../types/response_object.py                  |   94 ++
 .../types/response_object_stream.py           |   48 +
 .../responses/input_item_list_response.py     |   22 +
 .../types/route_list_params.py                |   17 +
 .../types/shared/__init__.py                  |    3 -
 .../types/shared/agent_config.py              |   98 --
 .../types/shared/response_format.py           |   39 -
 .../types/shared/sampling_params.py           |   76 --
 .../types/shared_params/__init__.py           |    3 -
 .../types/shared_params/agent_config.py       |  100 --
 .../types/shared_params/response_format.py    |   36 -
 .../types/shared_params/sampling_params.py    |   74 --
 .../types/tool_def_param.py                   |   34 -
 tests/api_resources/alpha/__init__.py         |    7 -
 tests/api_resources/alpha/agents/__init__.py  |    7 -
 .../alpha/agents/test_session.py              |  422 -------
 .../api_resources/alpha/agents/test_steps.py  |  178 ---
 tests/api_resources/alpha/agents/test_turn.py | 1036 ---------------
 tests/api_resources/alpha/eval/__init__.py    |    7 -
 tests/api_resources/alpha/eval/test_jobs.py   |  318 -----
 .../alpha/post_training/__init__.py           |    7 -
 .../alpha/post_training/test_job.py           |  270 ----
 tests/api_resources/alpha/test_agents.py      |  418 ------
 tests/api_resources/alpha/test_benchmarks.py  |  254 ----
 tests/api_resources/alpha/test_eval.py        | 1121 -----------------
 tests/api_resources/alpha/test_inference.py   |  124 --
 .../api_resources/alpha/test_post_training.py |  452 -------
 tests/api_resources/beta/__init__.py          |    1 -
 tests/api_resources/beta/test_datasets.py     |  527 --------
 tests/api_resources/test_responses.py         |   40 +
 tests/api_resources/test_routes.py            |   14 +
 122 files changed, 617 insertions(+), 13951 deletions(-)
 delete mode 100644 src/llama_stack_client/resources/alpha/__init__.py
 delete mode 100644 src/llama_stack_client/resources/alpha/agents/__init__.py
 delete mode 100644 src/llama_stack_client/resources/alpha/agents/agents.py
 delete mode 100644 src/llama_stack_client/resources/alpha/agents/session.py
 delete mode 100644 src/llama_stack_client/resources/alpha/agents/steps.py
 delete mode 100644 src/llama_stack_client/resources/alpha/agents/turn.py
 delete mode 100644 src/llama_stack_client/resources/alpha/alpha.py
 delete mode 100644 src/llama_stack_client/resources/alpha/benchmarks.py
 delete mode 100644 src/llama_stack_client/resources/alpha/eval/__init__.py
 delete mode 100644 src/llama_stack_client/resources/alpha/eval/eval.py
 delete mode 100644 src/llama_stack_client/resources/alpha/eval/jobs.py
 delete mode 100644 src/llama_stack_client/resources/alpha/inference.py
 delete mode 100644 src/llama_stack_client/resources/alpha/post_training/__init__.py
 delete mode 100644 src/llama_stack_client/resources/alpha/post_training/job.py
 delete mode 100644 src/llama_stack_client/resources/alpha/post_training/post_training.py
 delete mode 100644 src/llama_stack_client/resources/beta/__init__.py
 delete mode 100644 src/llama_stack_client/resources/beta/beta.py
 delete mode 100644 src/llama_stack_client/resources/beta/datasets.py
 delete mode 100644 src/llama_stack_client/types/alpha/agent_create_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/agent_create_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/agent_list_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/agent_list_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/agent_retrieve_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
 delete mode 100644 src/llama_stack_client/types/alpha/agents/session.py
 delete mode 100644 src/llama_stack_client/types/alpha/agents/session_create_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/agents/session_create_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/agents/session_list_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/agents/session_list_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/agents/turn.py
 delete mode 100644 src/llama_stack_client/types/alpha/agents/turn_create_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/agents/turn_response_event.py
 delete mode 100644 src/llama_stack_client/types/alpha/agents/turn_resume_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/algorithm_config_param.py
 delete mode 100644 src/llama_stack_client/types/alpha/benchmark.py
 delete mode 100644 src/llama_stack_client/types/alpha/benchmark_config_param.py
 delete mode 100644 src/llama_stack_client/types/alpha/benchmark_list_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/benchmark_register_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/eval_run_eval_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/evaluate_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/inference_rerank_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/inference_rerank_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/inference_step.py
 delete mode 100644 src/llama_stack_client/types/alpha/job.py
 delete mode 100644 src/llama_stack_client/types/alpha/list_benchmarks_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/memory_retrieval_step.py
 delete mode 100644 src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/post_training/job_list_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/post_training/job_status_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/post_training/job_status_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/post_training_job.py
 delete mode 100644 src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
 delete mode 100644 src/llama_stack_client/types/alpha/shield_call_step.py
 delete mode 100644 src/llama_stack_client/types/alpha/tool_execution_step.py
 delete mode 100644 src/llama_stack_client/types/alpha/tool_response.py
 delete mode 100644 src/llama_stack_client/types/alpha/tool_response_param.py
 delete mode 100644 src/llama_stack_client/types/beta/dataset_appendrows_params.py
 delete mode 100644 src/llama_stack_client/types/beta/dataset_iterrows_params.py
 delete mode 100644 src/llama_stack_client/types/beta/dataset_iterrows_response.py
 delete mode 100644 src/llama_stack_client/types/beta/dataset_list_response.py
 delete mode 100644 src/llama_stack_client/types/beta/dataset_register_params.py
 delete mode 100644 src/llama_stack_client/types/beta/dataset_register_response.py
 delete mode 100644 src/llama_stack_client/types/beta/dataset_retrieve_response.py
 delete mode 100644 src/llama_stack_client/types/beta/list_datasets_response.py
 create mode 100644 src/llama_stack_client/types/route_list_params.py
 delete mode 100644 src/llama_stack_client/types/shared/agent_config.py
 delete mode 100644 src/llama_stack_client/types/shared/response_format.py
 delete mode 100644 src/llama_stack_client/types/shared/sampling_params.py
 delete mode 100644 src/llama_stack_client/types/shared_params/agent_config.py
 delete mode 100644 src/llama_stack_client/types/shared_params/response_format.py
 delete mode 100644 src/llama_stack_client/types/shared_params/sampling_params.py
 delete mode 100644 src/llama_stack_client/types/tool_def_param.py
 delete mode 100644 tests/api_resources/alpha/__init__.py
 delete mode 100644 tests/api_resources/alpha/agents/__init__.py
 delete mode 100644 tests/api_resources/alpha/agents/test_session.py
 delete mode 100644 tests/api_resources/alpha/agents/test_steps.py
 delete mode 100644 tests/api_resources/alpha/agents/test_turn.py
 delete mode 100644 tests/api_resources/alpha/eval/__init__.py
 delete mode 100644 tests/api_resources/alpha/eval/test_jobs.py
 delete mode 100644 tests/api_resources/alpha/post_training/__init__.py
 delete mode 100644 tests/api_resources/alpha/post_training/test_job.py
 delete mode 100644 tests/api_resources/alpha/test_agents.py
 delete mode 100644 tests/api_resources/alpha/test_benchmarks.py
 delete mode 100644 tests/api_resources/alpha/test_eval.py
 delete mode 100644 tests/api_resources/alpha/test_inference.py
 delete mode 100644 tests/api_resources/alpha/test_post_training.py
 delete mode 100644 tests/api_resources/beta/__init__.py
 delete mode 100644 tests/api_resources/beta/test_datasets.py

diff --git a/.stats.yml b/.stats.yml
index 49885bb5..7196faba 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 104
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-35c6569e5e9fcc85084c9728eb7fc7c5908297fcc77043d621d25de3c850a990.yml
-openapi_spec_hash: 0f95bbeee16f3205d36ec34cfa62c711
+configured_endpoints: 71
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-96255baaaf07826c5292cbb73073ab40aa7073c53996c3be49441a8ecf95c8ee.yml
+openapi_spec_hash: fae0303cbf75bd79be4ae084db015401
 config_hash: a3829dbdaa491194d01f399784d532cd
diff --git a/api.md b/api.md
index 5f7a90a2..50e43a41 100644
--- a/api.md
+++ b/api.md
@@ -2,7 +2,6 @@
 
 ```python
 from llama_stack_client.types import (
-    AgentConfig,
     CompletionMessage,
     Document,
     InterleavedContent,
@@ -11,9 +10,7 @@ from llama_stack_client.types import (
     ParamType,
     QueryConfig,
     QueryResult,
-    ResponseFormat,
     SafetyViolation,
-    SamplingParams,
     ScoringResult,
     SystemMessage,
     ToolCall,
@@ -316,7 +313,7 @@ from llama_stack_client.types import ListRoutesResponse, RouteListResponse
 
 Methods:
 
-- <code title="get /v1/inspect/routes">client.routes.<a href="./src/llama_stack_client/resources/routes.py">list</a>() -> <a href="./src/llama_stack_client/types/route_list_response.py">RouteListResponse</a></code>
+- <code title="get /v1/inspect/routes">client.routes.<a href="./src/llama_stack_client/resources/routes.py">list</a>(\*\*<a href="src/llama_stack_client/types/route_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/route_list_response.py">RouteListResponse</a></code>
 
 # Moderations
 
@@ -416,188 +413,3 @@ Methods:
 - <code title="get /v1/files">client.files.<a href="./src/llama_stack_client/resources/files.py">list</a>(\*\*<a href="src/llama_stack_client/types/file_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/file.py">SyncOpenAICursorPage[File]</a></code>
 - <code title="delete /v1/files/{file_id}">client.files.<a href="./src/llama_stack_client/resources/files.py">delete</a>(file_id) -> <a href="./src/llama_stack_client/types/delete_file_response.py">DeleteFileResponse</a></code>
 - <code title="get /v1/files/{file_id}/content">client.files.<a href="./src/llama_stack_client/resources/files.py">content</a>(file_id) -> object</code>
-
-# Alpha
-
-## Inference
-
-Types:
-
-```python
-from llama_stack_client.types.alpha import InferenceRerankResponse
-```
-
-Methods:
-
-- <code title="post /v1alpha/inference/rerank">client.alpha.inference.<a href="./src/llama_stack_client/resources/alpha/inference.py">rerank</a>(\*\*<a href="src/llama_stack_client/types/alpha/inference_rerank_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/inference_rerank_response.py">InferenceRerankResponse</a></code>
-
-## PostTraining
-
-Types:
-
-```python
-from llama_stack_client.types.alpha import (
-    AlgorithmConfig,
-    ListPostTrainingJobsResponse,
-    PostTrainingJob,
-)
-```
-
-Methods:
-
-- <code title="post /v1alpha/post-training/preference-optimize">client.alpha.post_training.<a href="./src/llama_stack_client/resources/alpha/post_training/post_training.py">preference_optimize</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training_job.py">PostTrainingJob</a></code>
-- <code title="post /v1alpha/post-training/supervised-fine-tune">client.alpha.post_training.<a href="./src/llama_stack_client/resources/alpha/post_training/post_training.py">supervised_fine_tune</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training_job.py">PostTrainingJob</a></code>
-
-### Job
-
-Types:
-
-```python
-from llama_stack_client.types.alpha.post_training import (
-    JobListResponse,
-    JobArtifactsResponse,
-    JobStatusResponse,
-)
-```
-
-Methods:
-
-- <code title="get /v1alpha/post-training/jobs">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">list</a>() -> <a href="./src/llama_stack_client/types/alpha/post_training/job_list_response.py">JobListResponse</a></code>
-- <code title="get /v1alpha/post-training/job/artifacts">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">artifacts</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py">JobArtifactsResponse</a></code>
-- <code title="post /v1alpha/post-training/job/cancel">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">cancel</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training/job_cancel_params.py">params</a>) -> None</code>
-- <code title="get /v1alpha/post-training/job/status">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">status</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training/job_status_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training/job_status_response.py">JobStatusResponse</a></code>
-
-## Benchmarks
-
-Types:
-
-```python
-from llama_stack_client.types.alpha import Benchmark, ListBenchmarksResponse, BenchmarkListResponse
-```
-
-Methods:
-
-- <code title="get /v1alpha/eval/benchmarks/{benchmark_id}">client.alpha.benchmarks.<a href="./src/llama_stack_client/resources/alpha/benchmarks.py">retrieve</a>(benchmark_id) -> <a href="./src/llama_stack_client/types/alpha/benchmark.py">Benchmark</a></code>
-- <code title="get /v1alpha/eval/benchmarks">client.alpha.benchmarks.<a href="./src/llama_stack_client/resources/alpha/benchmarks.py">list</a>() -> <a href="./src/llama_stack_client/types/alpha/benchmark_list_response.py">BenchmarkListResponse</a></code>
-- <code title="post /v1alpha/eval/benchmarks">client.alpha.benchmarks.<a href="./src/llama_stack_client/resources/alpha/benchmarks.py">register</a>(\*\*<a href="src/llama_stack_client/types/alpha/benchmark_register_params.py">params</a>) -> None</code>
-
-## Eval
-
-Types:
-
-```python
-from llama_stack_client.types.alpha import BenchmarkConfig, EvaluateResponse, Job
-```
-
-Methods:
-
-- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">evaluate_rows</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/evaluate_response.py">EvaluateResponse</a></code>
-- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">evaluate_rows_alpha</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/evaluate_response.py">EvaluateResponse</a></code>
-- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/jobs">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">run_eval</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_run_eval_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/job.py">Job</a></code>
-- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/jobs">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">run_eval_alpha</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/job.py">Job</a></code>
-
-### Jobs
-
-Methods:
-
-- <code title="get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">retrieve</a>(job_id, \*, benchmark_id) -> <a href="./src/llama_stack_client/types/alpha/evaluate_response.py">EvaluateResponse</a></code>
-- <code title="delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">cancel</a>(job_id, \*, benchmark_id) -> None</code>
-- <code title="get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">status</a>(job_id, \*, benchmark_id) -> <a href="./src/llama_stack_client/types/alpha/job.py">Job</a></code>
-
-## Agents
-
-Types:
-
-```python
-from llama_stack_client.types.alpha import (
-    InferenceStep,
-    MemoryRetrievalStep,
-    ShieldCallStep,
-    ToolExecutionStep,
-    ToolResponse,
-    AgentCreateResponse,
-    AgentRetrieveResponse,
-    AgentListResponse,
-)
-```
-
-Methods:
-
-- <code title="post /v1alpha/agents">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">create</a>(\*\*<a href="src/llama_stack_client/types/alpha/agent_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agent_create_response.py">AgentCreateResponse</a></code>
-- <code title="get /v1alpha/agents/{agent_id}">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">retrieve</a>(agent_id) -> <a href="./src/llama_stack_client/types/alpha/agent_retrieve_response.py">AgentRetrieveResponse</a></code>
-- <code title="get /v1alpha/agents">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">list</a>(\*\*<a href="src/llama_stack_client/types/alpha/agent_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agent_list_response.py">AgentListResponse</a></code>
-- <code title="delete /v1alpha/agents/{agent_id}">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">delete</a>(agent_id) -> None</code>
-
-### Session
-
-Types:
-
-```python
-from llama_stack_client.types.alpha.agents import (
-    Session,
-    SessionCreateResponse,
-    SessionListResponse,
-)
-```
-
-Methods:
-
-- <code title="post /v1alpha/agents/{agent_id}/session">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">create</a>(agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session_create_response.py">SessionCreateResponse</a></code>
-- <code title="get /v1alpha/agents/{agent_id}/session/{session_id}">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">retrieve</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session.py">Session</a></code>
-- <code title="get /v1alpha/agents/{agent_id}/sessions">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">list</a>(agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session_list_response.py">SessionListResponse</a></code>
-- <code title="delete /v1alpha/agents/{agent_id}/session/{session_id}">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">delete</a>(session_id, \*, agent_id) -> None</code>
-
-### Steps
-
-Types:
-
-```python
-from llama_stack_client.types.alpha.agents import StepRetrieveResponse
-```
-
-Methods:
-
-- <code title="get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}">client.alpha.agents.steps.<a href="./src/llama_stack_client/resources/alpha/agents/steps.py">retrieve</a>(step_id, \*, agent_id, session_id, turn_id) -> <a href="./src/llama_stack_client/types/alpha/agents/step_retrieve_response.py">StepRetrieveResponse</a></code>
-
-### Turn
-
-Types:
-
-```python
-from llama_stack_client.types.alpha.agents import (
-    AgentTurnResponseStreamChunk,
-    Turn,
-    TurnResponseEvent,
-)
-```
-
-Methods:
-
-- <code title="post /v1alpha/agents/{agent_id}/session/{session_id}/turn">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">create</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/turn_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
-- <code title="get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">retrieve</a>(turn_id, \*, agent_id, session_id) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
-- <code title="post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">resume</a>(turn_id, \*, agent_id, session_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/turn_resume_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
-
-# Beta
-
-## Datasets
-
-Types:
-
-```python
-from llama_stack_client.types.beta import (
-    ListDatasetsResponse,
-    DatasetRetrieveResponse,
-    DatasetListResponse,
-    DatasetIterrowsResponse,
-    DatasetRegisterResponse,
-)
-```
-
-Methods:
-
-- <code title="get /v1beta/datasets/{dataset_id}">client.beta.datasets.<a href="./src/llama_stack_client/resources/beta/datasets.py">retrieve</a>(dataset_id) -> <a href="./src/llama_stack_client/types/beta/dataset_retrieve_response.py">DatasetRetrieveResponse</a></code>
-- <code title="get /v1beta/datasets">client.beta.datasets.<a href="./src/llama_stack_client/resources/beta/datasets.py">list</a>() -> <a href="./src/llama_stack_client/types/beta/dataset_list_response.py">DatasetListResponse</a></code>
-- <code title="post /v1beta/datasetio/append-rows/{dataset_id}">client.beta.datasets.<a href="./src/llama_stack_client/resources/beta/datasets.py">appendrows</a>(dataset_id, \*\*<a href="src/llama_stack_client/types/beta/dataset_appendrows_params.py">params</a>) -> None</code>
-- <code title="get /v1beta/datasetio/iterrows/{dataset_id}">client.beta.datasets.<a href="./src/llama_stack_client/resources/beta/datasets.py">iterrows</a>(dataset_id, \*\*<a href="src/llama_stack_client/types/beta/dataset_iterrows_params.py">params</a>) -> <a href="./src/llama_stack_client/types/beta/dataset_iterrows_response.py">DatasetIterrowsResponse</a></code>
-- <code title="post /v1beta/datasets">client.beta.datasets.<a href="./src/llama_stack_client/resources/beta/datasets.py">register</a>(\*\*<a href="src/llama_stack_client/types/beta/dataset_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/beta/dataset_register_response.py">DatasetRegisterResponse</a></code>
-- <code title="delete /v1beta/datasets/{dataset_id}">client.beta.datasets.<a href="./src/llama_stack_client/resources/beta/datasets.py">unregister</a>(dataset_id) -> None</code>
diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
index 34de181a..96289edd 100644
--- a/src/llama_stack_client/_client.py
+++ b/src/llama_stack_client/_client.py
@@ -39,9 +39,7 @@
 
 if TYPE_CHECKING:
     from .resources import (
-        beta,
         chat,
-        alpha,
         files,
         tools,
         models,
@@ -70,13 +68,11 @@
     from .resources.inspect import InspectResource, AsyncInspectResource
     from .resources.scoring import ScoringResource, AsyncScoringResource
     from .resources.shields import ShieldsResource, AsyncShieldsResource
-    from .resources.beta.beta import BetaResource, AsyncBetaResource
     from .resources.chat.chat import ChatResource, AsyncChatResource
     from .resources.providers import ProvidersResource, AsyncProvidersResource
     from .resources.vector_io import VectorIoResource, AsyncVectorIoResource
     from .resources.embeddings import EmbeddingsResource, AsyncEmbeddingsResource
     from .resources.toolgroups import ToolgroupsResource, AsyncToolgroupsResource
-    from .resources.alpha.alpha import AlphaResource, AsyncAlphaResource
     from .resources.completions import CompletionsResource, AsyncCompletionsResource
     from .resources.moderations import ModerationsResource, AsyncModerationsResource
     from .resources.models.models import ModelsResource, AsyncModelsResource
@@ -285,18 +281,6 @@ def files(self) -> FilesResource:
 
         return FilesResource(self)
 
-    @cached_property
-    def alpha(self) -> AlphaResource:
-        from .resources.alpha import AlphaResource
-
-        return AlphaResource(self)
-
-    @cached_property
-    def beta(self) -> BetaResource:
-        from .resources.beta import BetaResource
-
-        return BetaResource(self)
-
     @cached_property
     def with_raw_response(self) -> LlamaStackClientWithRawResponse:
         return LlamaStackClientWithRawResponse(self)
@@ -595,18 +579,6 @@ def files(self) -> AsyncFilesResource:
 
         return AsyncFilesResource(self)
 
-    @cached_property
-    def alpha(self) -> AsyncAlphaResource:
-        from .resources.alpha import AsyncAlphaResource
-
-        return AsyncAlphaResource(self)
-
-    @cached_property
-    def beta(self) -> AsyncBetaResource:
-        from .resources.beta import AsyncBetaResource
-
-        return AsyncBetaResource(self)
-
     @cached_property
     def with_raw_response(self) -> AsyncLlamaStackClientWithRawResponse:
         return AsyncLlamaStackClientWithRawResponse(self)
@@ -854,18 +826,6 @@ def files(self) -> files.FilesResourceWithRawResponse:
 
         return FilesResourceWithRawResponse(self._client.files)
 
-    @cached_property
-    def alpha(self) -> alpha.AlphaResourceWithRawResponse:
-        from .resources.alpha import AlphaResourceWithRawResponse
-
-        return AlphaResourceWithRawResponse(self._client.alpha)
-
-    @cached_property
-    def beta(self) -> beta.BetaResourceWithRawResponse:
-        from .resources.beta import BetaResourceWithRawResponse
-
-        return BetaResourceWithRawResponse(self._client.beta)
-
 
 class AsyncLlamaStackClientWithRawResponse:
     _client: AsyncLlamaStackClient
@@ -1001,18 +961,6 @@ def files(self) -> files.AsyncFilesResourceWithRawResponse:
 
         return AsyncFilesResourceWithRawResponse(self._client.files)
 
-    @cached_property
-    def alpha(self) -> alpha.AsyncAlphaResourceWithRawResponse:
-        from .resources.alpha import AsyncAlphaResourceWithRawResponse
-
-        return AsyncAlphaResourceWithRawResponse(self._client.alpha)
-
-    @cached_property
-    def beta(self) -> beta.AsyncBetaResourceWithRawResponse:
-        from .resources.beta import AsyncBetaResourceWithRawResponse
-
-        return AsyncBetaResourceWithRawResponse(self._client.beta)
-
 
 class LlamaStackClientWithStreamedResponse:
     _client: LlamaStackClient
@@ -1148,18 +1096,6 @@ def files(self) -> files.FilesResourceWithStreamingResponse:
 
         return FilesResourceWithStreamingResponse(self._client.files)
 
-    @cached_property
-    def alpha(self) -> alpha.AlphaResourceWithStreamingResponse:
-        from .resources.alpha import AlphaResourceWithStreamingResponse
-
-        return AlphaResourceWithStreamingResponse(self._client.alpha)
-
-    @cached_property
-    def beta(self) -> beta.BetaResourceWithStreamingResponse:
-        from .resources.beta import BetaResourceWithStreamingResponse
-
-        return BetaResourceWithStreamingResponse(self._client.beta)
-
 
 class AsyncLlamaStackClientWithStreamedResponse:
     _client: AsyncLlamaStackClient
@@ -1295,18 +1231,6 @@ def files(self) -> files.AsyncFilesResourceWithStreamingResponse:
 
         return AsyncFilesResourceWithStreamingResponse(self._client.files)
 
-    @cached_property
-    def alpha(self) -> alpha.AsyncAlphaResourceWithStreamingResponse:
-        from .resources.alpha import AsyncAlphaResourceWithStreamingResponse
-
-        return AsyncAlphaResourceWithStreamingResponse(self._client.alpha)
-
-    @cached_property
-    def beta(self) -> beta.AsyncBetaResourceWithStreamingResponse:
-        from .resources.beta import AsyncBetaResourceWithStreamingResponse
-
-        return AsyncBetaResourceWithStreamingResponse(self._client.beta)
-
 
 Client = LlamaStackClient
 
diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py
index 60b18979..3ca8c1c8 100644
--- a/src/llama_stack_client/resources/__init__.py
+++ b/src/llama_stack_client/resources/__init__.py
@@ -6,14 +6,6 @@
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .beta import (
-    BetaResource,
-    AsyncBetaResource,
-    BetaResourceWithRawResponse,
-    AsyncBetaResourceWithRawResponse,
-    BetaResourceWithStreamingResponse,
-    AsyncBetaResourceWithStreamingResponse,
-)
 from .chat import (
     ChatResource,
     AsyncChatResource,
@@ -22,14 +14,6 @@
     ChatResourceWithStreamingResponse,
     AsyncChatResourceWithStreamingResponse,
 )
-from .alpha import (
-    AlphaResource,
-    AsyncAlphaResource,
-    AlphaResourceWithRawResponse,
-    AsyncAlphaResourceWithRawResponse,
-    AlphaResourceWithStreamingResponse,
-    AsyncAlphaResourceWithStreamingResponse,
-)
 from .files import (
     FilesResource,
     AsyncFilesResource,
@@ -318,16 +302,4 @@
     "AsyncFilesResourceWithRawResponse",
     "FilesResourceWithStreamingResponse",
     "AsyncFilesResourceWithStreamingResponse",
-    "AlphaResource",
-    "AsyncAlphaResource",
-    "AlphaResourceWithRawResponse",
-    "AsyncAlphaResourceWithRawResponse",
-    "AlphaResourceWithStreamingResponse",
-    "AsyncAlphaResourceWithStreamingResponse",
-    "BetaResource",
-    "AsyncBetaResource",
-    "BetaResourceWithRawResponse",
-    "AsyncBetaResourceWithRawResponse",
-    "BetaResourceWithStreamingResponse",
-    "AsyncBetaResourceWithStreamingResponse",
 ]
diff --git a/src/llama_stack_client/resources/alpha/__init__.py b/src/llama_stack_client/resources/alpha/__init__.py
deleted file mode 100644
index ae13bed1..00000000
--- a/src/llama_stack_client/resources/alpha/__init__.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .eval import (
-    EvalResource,
-    AsyncEvalResource,
-    EvalResourceWithRawResponse,
-    AsyncEvalResourceWithRawResponse,
-    EvalResourceWithStreamingResponse,
-    AsyncEvalResourceWithStreamingResponse,
-)
-from .alpha import (
-    AlphaResource,
-    AsyncAlphaResource,
-    AlphaResourceWithRawResponse,
-    AsyncAlphaResourceWithRawResponse,
-    AlphaResourceWithStreamingResponse,
-    AsyncAlphaResourceWithStreamingResponse,
-)
-from .agents import (
-    AgentsResource,
-    AsyncAgentsResource,
-    AgentsResourceWithRawResponse,
-    AsyncAgentsResourceWithRawResponse,
-    AgentsResourceWithStreamingResponse,
-    AsyncAgentsResourceWithStreamingResponse,
-)
-from .inference import (
-    InferenceResource,
-    AsyncInferenceResource,
-    InferenceResourceWithRawResponse,
-    AsyncInferenceResourceWithRawResponse,
-    InferenceResourceWithStreamingResponse,
-    AsyncInferenceResourceWithStreamingResponse,
-)
-from .benchmarks import (
-    BenchmarksResource,
-    AsyncBenchmarksResource,
-    BenchmarksResourceWithRawResponse,
-    AsyncBenchmarksResourceWithRawResponse,
-    BenchmarksResourceWithStreamingResponse,
-    AsyncBenchmarksResourceWithStreamingResponse,
-)
-from .post_training import (
-    PostTrainingResource,
-    AsyncPostTrainingResource,
-    PostTrainingResourceWithRawResponse,
-    AsyncPostTrainingResourceWithRawResponse,
-    PostTrainingResourceWithStreamingResponse,
-    AsyncPostTrainingResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "InferenceResource",
-    "AsyncInferenceResource",
-    "InferenceResourceWithRawResponse",
-    "AsyncInferenceResourceWithRawResponse",
-    "InferenceResourceWithStreamingResponse",
-    "AsyncInferenceResourceWithStreamingResponse",
-    "PostTrainingResource",
-    "AsyncPostTrainingResource",
-    "PostTrainingResourceWithRawResponse",
-    "AsyncPostTrainingResourceWithRawResponse",
-    "PostTrainingResourceWithStreamingResponse",
-    "AsyncPostTrainingResourceWithStreamingResponse",
-    "BenchmarksResource",
-    "AsyncBenchmarksResource",
-    "BenchmarksResourceWithRawResponse",
-    "AsyncBenchmarksResourceWithRawResponse",
-    "BenchmarksResourceWithStreamingResponse",
-    "AsyncBenchmarksResourceWithStreamingResponse",
-    "EvalResource",
-    "AsyncEvalResource",
-    "EvalResourceWithRawResponse",
-    "AsyncEvalResourceWithRawResponse",
-    "EvalResourceWithStreamingResponse",
-    "AsyncEvalResourceWithStreamingResponse",
-    "AgentsResource",
-    "AsyncAgentsResource",
-    "AgentsResourceWithRawResponse",
-    "AsyncAgentsResourceWithRawResponse",
-    "AgentsResourceWithStreamingResponse",
-    "AsyncAgentsResourceWithStreamingResponse",
-    "AlphaResource",
-    "AsyncAlphaResource",
-    "AlphaResourceWithRawResponse",
-    "AsyncAlphaResourceWithRawResponse",
-    "AlphaResourceWithStreamingResponse",
-    "AsyncAlphaResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/alpha/agents/__init__.py b/src/llama_stack_client/resources/alpha/agents/__init__.py
deleted file mode 100644
index 6502dfa1..00000000
--- a/src/llama_stack_client/resources/alpha/agents/__init__.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .turn import (
-    TurnResource,
-    AsyncTurnResource,
-    TurnResourceWithRawResponse,
-    AsyncTurnResourceWithRawResponse,
-    TurnResourceWithStreamingResponse,
-    AsyncTurnResourceWithStreamingResponse,
-)
-from .steps import (
-    StepsResource,
-    AsyncStepsResource,
-    StepsResourceWithRawResponse,
-    AsyncStepsResourceWithRawResponse,
-    StepsResourceWithStreamingResponse,
-    AsyncStepsResourceWithStreamingResponse,
-)
-from .agents import (
-    AgentsResource,
-    AsyncAgentsResource,
-    AgentsResourceWithRawResponse,
-    AsyncAgentsResourceWithRawResponse,
-    AgentsResourceWithStreamingResponse,
-    AsyncAgentsResourceWithStreamingResponse,
-)
-from .session import (
-    SessionResource,
-    AsyncSessionResource,
-    SessionResourceWithRawResponse,
-    AsyncSessionResourceWithRawResponse,
-    SessionResourceWithStreamingResponse,
-    AsyncSessionResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "SessionResource",
-    "AsyncSessionResource",
-    "SessionResourceWithRawResponse",
-    "AsyncSessionResourceWithRawResponse",
-    "SessionResourceWithStreamingResponse",
-    "AsyncSessionResourceWithStreamingResponse",
-    "StepsResource",
-    "AsyncStepsResource",
-    "StepsResourceWithRawResponse",
-    "AsyncStepsResourceWithRawResponse",
-    "StepsResourceWithStreamingResponse",
-    "AsyncStepsResourceWithStreamingResponse",
-    "TurnResource",
-    "AsyncTurnResource",
-    "TurnResourceWithRawResponse",
-    "AsyncTurnResourceWithRawResponse",
-    "TurnResourceWithStreamingResponse",
-    "AsyncTurnResourceWithStreamingResponse",
-    "AgentsResource",
-    "AsyncAgentsResource",
-    "AgentsResourceWithRawResponse",
-    "AsyncAgentsResourceWithRawResponse",
-    "AgentsResourceWithStreamingResponse",
-    "AsyncAgentsResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/alpha/agents/agents.py b/src/llama_stack_client/resources/alpha/agents/agents.py
deleted file mode 100644
index ac5f58e4..00000000
--- a/src/llama_stack_client/resources/alpha/agents/agents.py
+++ /dev/null
@@ -1,534 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import httpx
-
-from .turn import (
-    TurnResource,
-    AsyncTurnResource,
-    TurnResourceWithRawResponse,
-    AsyncTurnResourceWithRawResponse,
-    TurnResourceWithStreamingResponse,
-    AsyncTurnResourceWithStreamingResponse,
-)
-from .steps import (
-    StepsResource,
-    AsyncStepsResource,
-    StepsResourceWithRawResponse,
-    AsyncStepsResourceWithRawResponse,
-    StepsResourceWithStreamingResponse,
-    AsyncStepsResourceWithStreamingResponse,
-)
-from .session import (
-    SessionResource,
-    AsyncSessionResource,
-    SessionResourceWithRawResponse,
-    AsyncSessionResourceWithRawResponse,
-    SessionResourceWithStreamingResponse,
-    AsyncSessionResourceWithStreamingResponse,
-)
-from ...._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
-from ...._utils import maybe_transform, async_maybe_transform
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ....types.alpha import agent_list_params, agent_create_params
-from ...._base_client import make_request_options
-from ....types.alpha.agent_list_response import AgentListResponse
-from ....types.shared_params.agent_config import AgentConfig
-from ....types.alpha.agent_create_response import AgentCreateResponse
-from ....types.alpha.agent_retrieve_response import AgentRetrieveResponse
-
-__all__ = ["AgentsResource", "AsyncAgentsResource"]
-
-
-class AgentsResource(SyncAPIResource):
-    @cached_property
-    def session(self) -> SessionResource:
-        return SessionResource(self._client)
-
-    @cached_property
-    def steps(self) -> StepsResource:
-        return StepsResource(self._client)
-
-    @cached_property
-    def turn(self) -> TurnResource:
-        return TurnResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AgentsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AgentsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AgentsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AgentsResourceWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        agent_config: AgentConfig,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AgentCreateResponse:
-        """
-        Create an agent with the given configuration.
-
-        Args:
-          agent_config: The configuration for the agent.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1alpha/agents",
-            body=maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AgentCreateResponse,
-        )
-
-    def retrieve(
-        self,
-        agent_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AgentRetrieveResponse:
-        """
-        Describe an agent by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        return self._get(
-            f"/v1alpha/agents/{agent_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AgentRetrieveResponse,
-        )
-
-    def list(
-        self,
-        *,
-        limit: int | Omit = omit,
-        start_index: int | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AgentListResponse:
-        """
-        List all agents.
-
-        Args:
-          limit: The number of agents to return.
-
-          start_index: The index to start the pagination from.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get(
-            "/v1alpha/agents",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "limit": limit,
-                        "start_index": start_index,
-                    },
-                    agent_list_params.AgentListParams,
-                ),
-            ),
-            cast_to=AgentListResponse,
-        )
-
-    def delete(
-        self,
-        agent_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Delete an agent by its ID and its associated sessions and turns.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._delete(
-            f"/v1alpha/agents/{agent_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AsyncAgentsResource(AsyncAPIResource):
-    @cached_property
-    def session(self) -> AsyncSessionResource:
-        return AsyncSessionResource(self._client)
-
-    @cached_property
-    def steps(self) -> AsyncStepsResource:
-        return AsyncStepsResource(self._client)
-
-    @cached_property
-    def turn(self) -> AsyncTurnResource:
-        return AsyncTurnResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncAgentsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncAgentsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncAgentsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncAgentsResourceWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        agent_config: AgentConfig,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AgentCreateResponse:
-        """
-        Create an agent with the given configuration.
-
-        Args:
-          agent_config: The configuration for the agent.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1alpha/agents",
-            body=await async_maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AgentCreateResponse,
-        )
-
-    async def retrieve(
-        self,
-        agent_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AgentRetrieveResponse:
-        """
-        Describe an agent by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        return await self._get(
-            f"/v1alpha/agents/{agent_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AgentRetrieveResponse,
-        )
-
-    async def list(
-        self,
-        *,
-        limit: int | Omit = omit,
-        start_index: int | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AgentListResponse:
-        """
-        List all agents.
-
-        Args:
-          limit: The number of agents to return.
-
-          start_index: The index to start the pagination from.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._get(
-            "/v1alpha/agents",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform(
-                    {
-                        "limit": limit,
-                        "start_index": start_index,
-                    },
-                    agent_list_params.AgentListParams,
-                ),
-            ),
-            cast_to=AgentListResponse,
-        )
-
-    async def delete(
-        self,
-        agent_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Delete an agent by its ID and its associated sessions and turns.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._delete(
-            f"/v1alpha/agents/{agent_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AgentsResourceWithRawResponse:
-    def __init__(self, agents: AgentsResource) -> None:
-        self._agents = agents
-
-        self.create = to_raw_response_wrapper(
-            agents.create,
-        )
-        self.retrieve = to_raw_response_wrapper(
-            agents.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            agents.list,
-        )
-        self.delete = to_raw_response_wrapper(
-            agents.delete,
-        )
-
-    @cached_property
-    def session(self) -> SessionResourceWithRawResponse:
-        return SessionResourceWithRawResponse(self._agents.session)
-
-    @cached_property
-    def steps(self) -> StepsResourceWithRawResponse:
-        return StepsResourceWithRawResponse(self._agents.steps)
-
-    @cached_property
-    def turn(self) -> TurnResourceWithRawResponse:
-        return TurnResourceWithRawResponse(self._agents.turn)
-
-
-class AsyncAgentsResourceWithRawResponse:
-    def __init__(self, agents: AsyncAgentsResource) -> None:
-        self._agents = agents
-
-        self.create = async_to_raw_response_wrapper(
-            agents.create,
-        )
-        self.retrieve = async_to_raw_response_wrapper(
-            agents.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            agents.list,
-        )
-        self.delete = async_to_raw_response_wrapper(
-            agents.delete,
-        )
-
-    @cached_property
-    def session(self) -> AsyncSessionResourceWithRawResponse:
-        return AsyncSessionResourceWithRawResponse(self._agents.session)
-
-    @cached_property
-    def steps(self) -> AsyncStepsResourceWithRawResponse:
-        return AsyncStepsResourceWithRawResponse(self._agents.steps)
-
-    @cached_property
-    def turn(self) -> AsyncTurnResourceWithRawResponse:
-        return AsyncTurnResourceWithRawResponse(self._agents.turn)
-
-
-class AgentsResourceWithStreamingResponse:
-    def __init__(self, agents: AgentsResource) -> None:
-        self._agents = agents
-
-        self.create = to_streamed_response_wrapper(
-            agents.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            agents.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            agents.list,
-        )
-        self.delete = to_streamed_response_wrapper(
-            agents.delete,
-        )
-
-    @cached_property
-    def session(self) -> SessionResourceWithStreamingResponse:
-        return SessionResourceWithStreamingResponse(self._agents.session)
-
-    @cached_property
-    def steps(self) -> StepsResourceWithStreamingResponse:
-        return StepsResourceWithStreamingResponse(self._agents.steps)
-
-    @cached_property
-    def turn(self) -> TurnResourceWithStreamingResponse:
-        return TurnResourceWithStreamingResponse(self._agents.turn)
-
-
-class AsyncAgentsResourceWithStreamingResponse:
-    def __init__(self, agents: AsyncAgentsResource) -> None:
-        self._agents = agents
-
-        self.create = async_to_streamed_response_wrapper(
-            agents.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            agents.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            agents.list,
-        )
-        self.delete = async_to_streamed_response_wrapper(
-            agents.delete,
-        )
-
-    @cached_property
-    def session(self) -> AsyncSessionResourceWithStreamingResponse:
-        return AsyncSessionResourceWithStreamingResponse(self._agents.session)
-
-    @cached_property
-    def steps(self) -> AsyncStepsResourceWithStreamingResponse:
-        return AsyncStepsResourceWithStreamingResponse(self._agents.steps)
-
-    @cached_property
-    def turn(self) -> AsyncTurnResourceWithStreamingResponse:
-        return AsyncTurnResourceWithStreamingResponse(self._agents.turn)
diff --git a/src/llama_stack_client/resources/alpha/agents/session.py b/src/llama_stack_client/resources/alpha/agents/session.py
deleted file mode 100644
index ae2b5af6..00000000
--- a/src/llama_stack_client/resources/alpha/agents/session.py
+++ /dev/null
@@ -1,477 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import httpx
-
-from ...._types import Body, Omit, Query, Headers, NoneType, NotGiven, SequenceNotStr, omit, not_given
-from ...._utils import maybe_transform, async_maybe_transform
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ...._base_client import make_request_options
-from ....types.alpha.agents import session_list_params, session_create_params, session_retrieve_params
-from ....types.alpha.agents.session import Session
-from ....types.alpha.agents.session_list_response import SessionListResponse
-from ....types.alpha.agents.session_create_response import SessionCreateResponse
-
-__all__ = ["SessionResource", "AsyncSessionResource"]
-
-
-class SessionResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> SessionResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return SessionResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> SessionResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return SessionResourceWithStreamingResponse(self)
-
-    def create(
-        self,
-        agent_id: str,
-        *,
-        session_name: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> SessionCreateResponse:
-        """
-        Create a new session for an agent.
-
-        Args:
-          session_name: The name of the session to create.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        return self._post(
-            f"/v1alpha/agents/{agent_id}/session",
-            body=maybe_transform({"session_name": session_name}, session_create_params.SessionCreateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=SessionCreateResponse,
-        )
-
-    def retrieve(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        turn_ids: SequenceNotStr[str] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Session:
-        """
-        Retrieve an agent session by its ID.
-
-        Args:
-          turn_ids: (Optional) List of turn IDs to filter the session by.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        return self._get(
-            f"/v1alpha/agents/{agent_id}/session/{session_id}",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform({"turn_ids": turn_ids}, session_retrieve_params.SessionRetrieveParams),
-            ),
-            cast_to=Session,
-        )
-
-    def list(
-        self,
-        agent_id: str,
-        *,
-        limit: int | Omit = omit,
-        start_index: int | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> SessionListResponse:
-        """
-        List all session(s) of a given agent.
-
-        Args:
-          limit: The number of sessions to return.
-
-          start_index: The index to start the pagination from.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        return self._get(
-            f"/v1alpha/agents/{agent_id}/sessions",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "limit": limit,
-                        "start_index": start_index,
-                    },
-                    session_list_params.SessionListParams,
-                ),
-            ),
-            cast_to=SessionListResponse,
-        )
-
-    def delete(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Delete an agent session by its ID and its associated turns.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._delete(
-            f"/v1alpha/agents/{agent_id}/session/{session_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AsyncSessionResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncSessionResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncSessionResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncSessionResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncSessionResourceWithStreamingResponse(self)
-
-    async def create(
-        self,
-        agent_id: str,
-        *,
-        session_name: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> SessionCreateResponse:
-        """
-        Create a new session for an agent.
-
-        Args:
-          session_name: The name of the session to create.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        return await self._post(
-            f"/v1alpha/agents/{agent_id}/session",
-            body=await async_maybe_transform({"session_name": session_name}, session_create_params.SessionCreateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=SessionCreateResponse,
-        )
-
-    async def retrieve(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        turn_ids: SequenceNotStr[str] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Session:
-        """
-        Retrieve an agent session by its ID.
-
-        Args:
-          turn_ids: (Optional) List of turn IDs to filter the session by.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        return await self._get(
-            f"/v1alpha/agents/{agent_id}/session/{session_id}",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform(
-                    {"turn_ids": turn_ids}, session_retrieve_params.SessionRetrieveParams
-                ),
-            ),
-            cast_to=Session,
-        )
-
-    async def list(
-        self,
-        agent_id: str,
-        *,
-        limit: int | Omit = omit,
-        start_index: int | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> SessionListResponse:
-        """
-        List all session(s) of a given agent.
-
-        Args:
-          limit: The number of sessions to return.
-
-          start_index: The index to start the pagination from.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        return await self._get(
-            f"/v1alpha/agents/{agent_id}/sessions",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform(
-                    {
-                        "limit": limit,
-                        "start_index": start_index,
-                    },
-                    session_list_params.SessionListParams,
-                ),
-            ),
-            cast_to=SessionListResponse,
-        )
-
-    async def delete(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Delete an agent session by its ID and its associated turns.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._delete(
-            f"/v1alpha/agents/{agent_id}/session/{session_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class SessionResourceWithRawResponse:
-    def __init__(self, session: SessionResource) -> None:
-        self._session = session
-
-        self.create = to_raw_response_wrapper(
-            session.create,
-        )
-        self.retrieve = to_raw_response_wrapper(
-            session.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            session.list,
-        )
-        self.delete = to_raw_response_wrapper(
-            session.delete,
-        )
-
-
-class AsyncSessionResourceWithRawResponse:
-    def __init__(self, session: AsyncSessionResource) -> None:
-        self._session = session
-
-        self.create = async_to_raw_response_wrapper(
-            session.create,
-        )
-        self.retrieve = async_to_raw_response_wrapper(
-            session.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            session.list,
-        )
-        self.delete = async_to_raw_response_wrapper(
-            session.delete,
-        )
-
-
-class SessionResourceWithStreamingResponse:
-    def __init__(self, session: SessionResource) -> None:
-        self._session = session
-
-        self.create = to_streamed_response_wrapper(
-            session.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            session.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            session.list,
-        )
-        self.delete = to_streamed_response_wrapper(
-            session.delete,
-        )
-
-
-class AsyncSessionResourceWithStreamingResponse:
-    def __init__(self, session: AsyncSessionResource) -> None:
-        self._session = session
-
-        self.create = async_to_streamed_response_wrapper(
-            session.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            session.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            session.list,
-        )
-        self.delete = async_to_streamed_response_wrapper(
-            session.delete,
-        )
diff --git a/src/llama_stack_client/resources/alpha/agents/steps.py b/src/llama_stack_client/resources/alpha/agents/steps.py
deleted file mode 100644
index 83624bef..00000000
--- a/src/llama_stack_client/resources/alpha/agents/steps.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import httpx
-
-from ...._types import Body, Query, Headers, NotGiven, not_given
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ...._base_client import make_request_options
-from ....types.alpha.agents.step_retrieve_response import StepRetrieveResponse
-
-__all__ = ["StepsResource", "AsyncStepsResource"]
-
-
-class StepsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> StepsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return StepsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> StepsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return StepsResourceWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        step_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        turn_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> StepRetrieveResponse:
-        """
-        Retrieve an agent step by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        if not turn_id:
-            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        if not step_id:
-            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
-        return self._get(
-            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=StepRetrieveResponse,
-        )
-
-
-class AsyncStepsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncStepsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncStepsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncStepsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncStepsResourceWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        step_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        turn_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> StepRetrieveResponse:
-        """
-        Retrieve an agent step by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        if not turn_id:
-            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        if not step_id:
-            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
-        return await self._get(
-            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=StepRetrieveResponse,
-        )
-
-
-class StepsResourceWithRawResponse:
-    def __init__(self, steps: StepsResource) -> None:
-        self._steps = steps
-
-        self.retrieve = to_raw_response_wrapper(
-            steps.retrieve,
-        )
-
-
-class AsyncStepsResourceWithRawResponse:
-    def __init__(self, steps: AsyncStepsResource) -> None:
-        self._steps = steps
-
-        self.retrieve = async_to_raw_response_wrapper(
-            steps.retrieve,
-        )
-
-
-class StepsResourceWithStreamingResponse:
-    def __init__(self, steps: StepsResource) -> None:
-        self._steps = steps
-
-        self.retrieve = to_streamed_response_wrapper(
-            steps.retrieve,
-        )
-
-
-class AsyncStepsResourceWithStreamingResponse:
-    def __init__(self, steps: AsyncStepsResource) -> None:
-        self._steps = steps
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            steps.retrieve,
-        )
diff --git a/src/llama_stack_client/resources/alpha/agents/turn.py b/src/llama_stack_client/resources/alpha/agents/turn.py
deleted file mode 100644
index 85e3d1e5..00000000
--- a/src/llama_stack_client/resources/alpha/agents/turn.py
+++ /dev/null
@@ -1,881 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Iterable
-from typing_extensions import Literal, overload
-
-import httpx
-
-from ...._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
-from ...._utils import required_args, maybe_transform, async_maybe_transform
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ...._streaming import Stream, AsyncStream
-from ...._base_client import make_request_options
-from ....types.alpha.agents import turn_create_params, turn_resume_params
-from ....types.alpha.agents.turn import Turn
-from ....types.alpha.tool_response_param import ToolResponseParam
-from ....types.alpha.agents.agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk
-
-__all__ = ["TurnResource", "AsyncTurnResource"]
-
-
-class TurnResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> TurnResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return TurnResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> TurnResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return TurnResourceWithStreamingResponse(self)
-
-    @overload
-    def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        documents: Iterable[turn_create_params.Document] | Omit = omit,
-        stream: Literal[False] | Omit = omit,
-        tool_config: turn_create_params.ToolConfig | Omit = omit,
-        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn:
-        """
-        Create a new turn for an agent.
-
-        Args:
-          messages: List of messages to start the turn with.
-
-          documents: (Optional) List of documents to create the turn with.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          tool_config: (Optional) The tool configuration to create the turn with, will be used to
-              override the agent's tool_config.
-
-          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
-              to the agent's config toolgroups for the request.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        stream: Literal[True],
-        documents: Iterable[turn_create_params.Document] | Omit = omit,
-        tool_config: turn_create_params.ToolConfig | Omit = omit,
-        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Stream[AgentTurnResponseStreamChunk]:
-        """
-        Create a new turn for an agent.
-
-        Args:
-          messages: List of messages to start the turn with.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          documents: (Optional) List of documents to create the turn with.
-
-          tool_config: (Optional) The tool configuration to create the turn with, will be used to
-              override the agent's tool_config.
-
-          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
-              to the agent's config toolgroups for the request.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        stream: bool,
-        documents: Iterable[turn_create_params.Document] | Omit = omit,
-        tool_config: turn_create_params.ToolConfig | Omit = omit,
-        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
-        """
-        Create a new turn for an agent.
-
-        Args:
-          messages: List of messages to start the turn with.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          documents: (Optional) List of documents to create the turn with.
-
-          tool_config: (Optional) The tool configuration to create the turn with, will be used to
-              override the agent's tool_config.
-
-          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
-              to the agent's config toolgroups for the request.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["agent_id", "messages"], ["agent_id", "messages", "stream"])
-    def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        documents: Iterable[turn_create_params.Document] | Omit = omit,
-        stream: Literal[False] | Literal[True] | Omit = omit,
-        tool_config: turn_create_params.ToolConfig | Omit = omit,
-        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        return self._post(
-            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn",
-            body=maybe_transform(
-                {
-                    "messages": messages,
-                    "documents": documents,
-                    "stream": stream,
-                    "tool_config": tool_config,
-                    "toolgroups": toolgroups,
-                },
-                turn_create_params.TurnCreateParamsStreaming
-                if stream
-                else turn_create_params.TurnCreateParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Turn,
-            stream=stream or False,
-            stream_cls=Stream[AgentTurnResponseStreamChunk],
-        )
-
-    def retrieve(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn:
-        """
-        Retrieve an agent turn by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        if not turn_id:
-            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        return self._get(
-            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Turn,
-        )
-
-    @overload
-    def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        tool_responses: Iterable[ToolResponseParam],
-        stream: Literal[False] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn:
-        """Resume an agent turn with executed tool call responses.
-
-        When a Turn has the
-        status `awaiting_input` due to pending input from client side tool calls, this
-        endpoint can be used to submit the outputs from the tool calls once they are
-        ready.
-
-        Args:
-          tool_responses: The tool call responses to resume the turn with.
-
-          stream: Whether to stream the response.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        stream: Literal[True],
-        tool_responses: Iterable[ToolResponseParam],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Stream[AgentTurnResponseStreamChunk]:
-        """Resume an agent turn with executed tool call responses.
-
-        When a Turn has the
-        status `awaiting_input` due to pending input from client side tool calls, this
-        endpoint can be used to submit the outputs from the tool calls once they are
-        ready.
-
-        Args:
-          stream: Whether to stream the response.
-
-          tool_responses: The tool call responses to resume the turn with.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        stream: bool,
-        tool_responses: Iterable[ToolResponseParam],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
-        """Resume an agent turn with executed tool call responses.
-
-        When a Turn has the
-        status `awaiting_input` due to pending input from client side tool calls, this
-        endpoint can be used to submit the outputs from the tool calls once they are
-        ready.
-
-        Args:
-          stream: Whether to stream the response.
-
-          tool_responses: The tool call responses to resume the turn with.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["agent_id", "session_id", "tool_responses"], ["agent_id", "session_id", "stream", "tool_responses"])
-    def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        tool_responses: Iterable[ToolResponseParam],
-        stream: Literal[False] | Literal[True] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        if not turn_id:
-            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        return self._post(
-            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
-            body=maybe_transform(
-                {
-                    "tool_responses": tool_responses,
-                    "stream": stream,
-                },
-                turn_resume_params.TurnResumeParamsStreaming
-                if stream
-                else turn_resume_params.TurnResumeParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Turn,
-            stream=stream or False,
-            stream_cls=Stream[AgentTurnResponseStreamChunk],
-        )
-
-
-class AsyncTurnResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncTurnResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncTurnResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncTurnResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncTurnResourceWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        documents: Iterable[turn_create_params.Document] | Omit = omit,
-        stream: Literal[False] | Omit = omit,
-        tool_config: turn_create_params.ToolConfig | Omit = omit,
-        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn:
-        """
-        Create a new turn for an agent.
-
-        Args:
-          messages: List of messages to start the turn with.
-
-          documents: (Optional) List of documents to create the turn with.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          tool_config: (Optional) The tool configuration to create the turn with, will be used to
-              override the agent's tool_config.
-
-          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
-              to the agent's config toolgroups for the request.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        stream: Literal[True],
-        documents: Iterable[turn_create_params.Document] | Omit = omit,
-        tool_config: turn_create_params.ToolConfig | Omit = omit,
-        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AsyncStream[AgentTurnResponseStreamChunk]:
-        """
-        Create a new turn for an agent.
-
-        Args:
-          messages: List of messages to start the turn with.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          documents: (Optional) List of documents to create the turn with.
-
-          tool_config: (Optional) The tool configuration to create the turn with, will be used to
-              override the agent's tool_config.
-
-          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
-              to the agent's config toolgroups for the request.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        stream: bool,
-        documents: Iterable[turn_create_params.Document] | Omit = omit,
-        tool_config: turn_create_params.ToolConfig | Omit = omit,
-        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
-        """
-        Create a new turn for an agent.
-
-        Args:
-          messages: List of messages to start the turn with.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          documents: (Optional) List of documents to create the turn with.
-
-          tool_config: (Optional) The tool configuration to create the turn with, will be used to
-              override the agent's tool_config.
-
-          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
-              to the agent's config toolgroups for the request.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["agent_id", "messages"], ["agent_id", "messages", "stream"])
-    async def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        documents: Iterable[turn_create_params.Document] | Omit = omit,
-        stream: Literal[False] | Literal[True] | Omit = omit,
-        tool_config: turn_create_params.ToolConfig | Omit = omit,
-        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        return await self._post(
-            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn",
-            body=await async_maybe_transform(
-                {
-                    "messages": messages,
-                    "documents": documents,
-                    "stream": stream,
-                    "tool_config": tool_config,
-                    "toolgroups": toolgroups,
-                },
-                turn_create_params.TurnCreateParamsStreaming
-                if stream
-                else turn_create_params.TurnCreateParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Turn,
-            stream=stream or False,
-            stream_cls=AsyncStream[AgentTurnResponseStreamChunk],
-        )
-
-    async def retrieve(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn:
-        """
-        Retrieve an agent turn by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        if not turn_id:
-            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        return await self._get(
-            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Turn,
-        )
-
-    @overload
-    async def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        tool_responses: Iterable[ToolResponseParam],
-        stream: Literal[False] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn:
-        """Resume an agent turn with executed tool call responses.
-
-        When a Turn has the
-        status `awaiting_input` due to pending input from client side tool calls, this
-        endpoint can be used to submit the outputs from the tool calls once they are
-        ready.
-
-        Args:
-          tool_responses: The tool call responses to resume the turn with.
-
-          stream: Whether to stream the response.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        stream: Literal[True],
-        tool_responses: Iterable[ToolResponseParam],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AsyncStream[AgentTurnResponseStreamChunk]:
-        """Resume an agent turn with executed tool call responses.
-
-        When a Turn has the
-        status `awaiting_input` due to pending input from client side tool calls, this
-        endpoint can be used to submit the outputs from the tool calls once they are
-        ready.
-
-        Args:
-          stream: Whether to stream the response.
-
-          tool_responses: The tool call responses to resume the turn with.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        stream: bool,
-        tool_responses: Iterable[ToolResponseParam],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
-        """Resume an agent turn with executed tool call responses.
-
-        When a Turn has the
-        status `awaiting_input` due to pending input from client side tool calls, this
-        endpoint can be used to submit the outputs from the tool calls once they are
-        ready.
-
-        Args:
-          stream: Whether to stream the response.
-
-          tool_responses: The tool call responses to resume the turn with.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["agent_id", "session_id", "tool_responses"], ["agent_id", "session_id", "stream", "tool_responses"])
-    async def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        tool_responses: Iterable[ToolResponseParam],
-        stream: Literal[False] | Literal[True] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        if not turn_id:
-            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        return await self._post(
-            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
-            body=await async_maybe_transform(
-                {
-                    "tool_responses": tool_responses,
-                    "stream": stream,
-                },
-                turn_resume_params.TurnResumeParamsStreaming
-                if stream
-                else turn_resume_params.TurnResumeParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Turn,
-            stream=stream or False,
-            stream_cls=AsyncStream[AgentTurnResponseStreamChunk],
-        )
-
-
-class TurnResourceWithRawResponse:
-    def __init__(self, turn: TurnResource) -> None:
-        self._turn = turn
-
-        self.create = to_raw_response_wrapper(
-            turn.create,
-        )
-        self.retrieve = to_raw_response_wrapper(
-            turn.retrieve,
-        )
-        self.resume = to_raw_response_wrapper(
-            turn.resume,
-        )
-
-
-class AsyncTurnResourceWithRawResponse:
-    def __init__(self, turn: AsyncTurnResource) -> None:
-        self._turn = turn
-
-        self.create = async_to_raw_response_wrapper(
-            turn.create,
-        )
-        self.retrieve = async_to_raw_response_wrapper(
-            turn.retrieve,
-        )
-        self.resume = async_to_raw_response_wrapper(
-            turn.resume,
-        )
-
-
-class TurnResourceWithStreamingResponse:
-    def __init__(self, turn: TurnResource) -> None:
-        self._turn = turn
-
-        self.create = to_streamed_response_wrapper(
-            turn.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            turn.retrieve,
-        )
-        self.resume = to_streamed_response_wrapper(
-            turn.resume,
-        )
-
-
-class AsyncTurnResourceWithStreamingResponse:
-    def __init__(self, turn: AsyncTurnResource) -> None:
-        self._turn = turn
-
-        self.create = async_to_streamed_response_wrapper(
-            turn.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            turn.retrieve,
-        )
-        self.resume = async_to_streamed_response_wrapper(
-            turn.resume,
-        )
diff --git a/src/llama_stack_client/resources/alpha/alpha.py b/src/llama_stack_client/resources/alpha/alpha.py
deleted file mode 100644
index 9ba65570..00000000
--- a/src/llama_stack_client/resources/alpha/alpha.py
+++ /dev/null
@@ -1,236 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from ..._compat import cached_property
-from .eval.eval import (
-    EvalResource,
-    AsyncEvalResource,
-    EvalResourceWithRawResponse,
-    AsyncEvalResourceWithRawResponse,
-    EvalResourceWithStreamingResponse,
-    AsyncEvalResourceWithStreamingResponse,
-)
-from .inference import (
-    InferenceResource,
-    AsyncInferenceResource,
-    InferenceResourceWithRawResponse,
-    AsyncInferenceResourceWithRawResponse,
-    InferenceResourceWithStreamingResponse,
-    AsyncInferenceResourceWithStreamingResponse,
-)
-from .benchmarks import (
-    BenchmarksResource,
-    AsyncBenchmarksResource,
-    BenchmarksResourceWithRawResponse,
-    AsyncBenchmarksResourceWithRawResponse,
-    BenchmarksResourceWithStreamingResponse,
-    AsyncBenchmarksResourceWithStreamingResponse,
-)
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from .agents.agents import (
-    AgentsResource,
-    AsyncAgentsResource,
-    AgentsResourceWithRawResponse,
-    AsyncAgentsResourceWithRawResponse,
-    AgentsResourceWithStreamingResponse,
-    AsyncAgentsResourceWithStreamingResponse,
-)
-from .post_training.post_training import (
-    PostTrainingResource,
-    AsyncPostTrainingResource,
-    PostTrainingResourceWithRawResponse,
-    AsyncPostTrainingResourceWithRawResponse,
-    PostTrainingResourceWithStreamingResponse,
-    AsyncPostTrainingResourceWithStreamingResponse,
-)
-
-__all__ = ["AlphaResource", "AsyncAlphaResource"]
-
-
-class AlphaResource(SyncAPIResource):
-    @cached_property
-    def inference(self) -> InferenceResource:
-        return InferenceResource(self._client)
-
-    @cached_property
-    def post_training(self) -> PostTrainingResource:
-        return PostTrainingResource(self._client)
-
-    @cached_property
-    def benchmarks(self) -> BenchmarksResource:
-        return BenchmarksResource(self._client)
-
-    @cached_property
-    def eval(self) -> EvalResource:
-        return EvalResource(self._client)
-
-    @cached_property
-    def agents(self) -> AgentsResource:
-        return AgentsResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AlphaResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AlphaResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AlphaResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AlphaResourceWithStreamingResponse(self)
-
-
-class AsyncAlphaResource(AsyncAPIResource):
-    @cached_property
-    def inference(self) -> AsyncInferenceResource:
-        return AsyncInferenceResource(self._client)
-
-    @cached_property
-    def post_training(self) -> AsyncPostTrainingResource:
-        return AsyncPostTrainingResource(self._client)
-
-    @cached_property
-    def benchmarks(self) -> AsyncBenchmarksResource:
-        return AsyncBenchmarksResource(self._client)
-
-    @cached_property
-    def eval(self) -> AsyncEvalResource:
-        return AsyncEvalResource(self._client)
-
-    @cached_property
-    def agents(self) -> AsyncAgentsResource:
-        return AsyncAgentsResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncAlphaResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncAlphaResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncAlphaResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncAlphaResourceWithStreamingResponse(self)
-
-
-class AlphaResourceWithRawResponse:
-    def __init__(self, alpha: AlphaResource) -> None:
-        self._alpha = alpha
-
-    @cached_property
-    def inference(self) -> InferenceResourceWithRawResponse:
-        return InferenceResourceWithRawResponse(self._alpha.inference)
-
-    @cached_property
-    def post_training(self) -> PostTrainingResourceWithRawResponse:
-        return PostTrainingResourceWithRawResponse(self._alpha.post_training)
-
-    @cached_property
-    def benchmarks(self) -> BenchmarksResourceWithRawResponse:
-        return BenchmarksResourceWithRawResponse(self._alpha.benchmarks)
-
-    @cached_property
-    def eval(self) -> EvalResourceWithRawResponse:
-        return EvalResourceWithRawResponse(self._alpha.eval)
-
-    @cached_property
-    def agents(self) -> AgentsResourceWithRawResponse:
-        return AgentsResourceWithRawResponse(self._alpha.agents)
-
-
-class AsyncAlphaResourceWithRawResponse:
-    def __init__(self, alpha: AsyncAlphaResource) -> None:
-        self._alpha = alpha
-
-    @cached_property
-    def inference(self) -> AsyncInferenceResourceWithRawResponse:
-        return AsyncInferenceResourceWithRawResponse(self._alpha.inference)
-
-    @cached_property
-    def post_training(self) -> AsyncPostTrainingResourceWithRawResponse:
-        return AsyncPostTrainingResourceWithRawResponse(self._alpha.post_training)
-
-    @cached_property
-    def benchmarks(self) -> AsyncBenchmarksResourceWithRawResponse:
-        return AsyncBenchmarksResourceWithRawResponse(self._alpha.benchmarks)
-
-    @cached_property
-    def eval(self) -> AsyncEvalResourceWithRawResponse:
-        return AsyncEvalResourceWithRawResponse(self._alpha.eval)
-
-    @cached_property
-    def agents(self) -> AsyncAgentsResourceWithRawResponse:
-        return AsyncAgentsResourceWithRawResponse(self._alpha.agents)
-
-
-class AlphaResourceWithStreamingResponse:
-    def __init__(self, alpha: AlphaResource) -> None:
-        self._alpha = alpha
-
-    @cached_property
-    def inference(self) -> InferenceResourceWithStreamingResponse:
-        return InferenceResourceWithStreamingResponse(self._alpha.inference)
-
-    @cached_property
-    def post_training(self) -> PostTrainingResourceWithStreamingResponse:
-        return PostTrainingResourceWithStreamingResponse(self._alpha.post_training)
-
-    @cached_property
-    def benchmarks(self) -> BenchmarksResourceWithStreamingResponse:
-        return BenchmarksResourceWithStreamingResponse(self._alpha.benchmarks)
-
-    @cached_property
-    def eval(self) -> EvalResourceWithStreamingResponse:
-        return EvalResourceWithStreamingResponse(self._alpha.eval)
-
-    @cached_property
-    def agents(self) -> AgentsResourceWithStreamingResponse:
-        return AgentsResourceWithStreamingResponse(self._alpha.agents)
-
-
-class AsyncAlphaResourceWithStreamingResponse:
-    def __init__(self, alpha: AsyncAlphaResource) -> None:
-        self._alpha = alpha
-
-    @cached_property
-    def inference(self) -> AsyncInferenceResourceWithStreamingResponse:
-        return AsyncInferenceResourceWithStreamingResponse(self._alpha.inference)
-
-    @cached_property
-    def post_training(self) -> AsyncPostTrainingResourceWithStreamingResponse:
-        return AsyncPostTrainingResourceWithStreamingResponse(self._alpha.post_training)
-
-    @cached_property
-    def benchmarks(self) -> AsyncBenchmarksResourceWithStreamingResponse:
-        return AsyncBenchmarksResourceWithStreamingResponse(self._alpha.benchmarks)
-
-    @cached_property
-    def eval(self) -> AsyncEvalResourceWithStreamingResponse:
-        return AsyncEvalResourceWithStreamingResponse(self._alpha.eval)
-
-    @cached_property
-    def agents(self) -> AsyncAgentsResourceWithStreamingResponse:
-        return AsyncAgentsResourceWithStreamingResponse(self._alpha.agents)
diff --git a/src/llama_stack_client/resources/alpha/benchmarks.py b/src/llama_stack_client/resources/alpha/benchmarks.py
deleted file mode 100644
index dc74cc85..00000000
--- a/src/llama_stack_client/resources/alpha/benchmarks.py
+++ /dev/null
@@ -1,365 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Type, Union, Iterable, cast
-
-import httpx
-
-from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, SequenceNotStr, omit, not_given
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._wrappers import DataWrapper
-from ...types.alpha import benchmark_register_params
-from ..._base_client import make_request_options
-from ...types.alpha.benchmark import Benchmark
-from ...types.alpha.benchmark_list_response import BenchmarkListResponse
-
-__all__ = ["BenchmarksResource", "AsyncBenchmarksResource"]
-
-
-class BenchmarksResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> BenchmarksResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return BenchmarksResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> BenchmarksResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return BenchmarksResourceWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        benchmark_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Benchmark:
-        """
-        Get a benchmark by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return self._get(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Benchmark,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> BenchmarkListResponse:
-        """List all benchmarks."""
-        return self._get(
-            "/v1alpha/eval/benchmarks",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[BenchmarkListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[BenchmarkListResponse], DataWrapper[BenchmarkListResponse]),
-        )
-
-    def register(
-        self,
-        *,
-        benchmark_id: str,
-        dataset_id: str,
-        scoring_functions: SequenceNotStr[str],
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | Omit = omit,
-        provider_benchmark_id: str | Omit = omit,
-        provider_id: str | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Register a benchmark.
-
-        Args:
-          benchmark_id: The ID of the benchmark to register.
-
-          dataset_id: The ID of the dataset to use for the benchmark.
-
-          scoring_functions: The scoring functions to use for the benchmark.
-
-          metadata: The metadata to use for the benchmark.
-
-          provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
-
-          provider_id: The ID of the provider to use for the benchmark.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._post(
-            "/v1alpha/eval/benchmarks",
-            body=maybe_transform(
-                {
-                    "benchmark_id": benchmark_id,
-                    "dataset_id": dataset_id,
-                    "scoring_functions": scoring_functions,
-                    "metadata": metadata,
-                    "provider_benchmark_id": provider_benchmark_id,
-                    "provider_id": provider_id,
-                },
-                benchmark_register_params.BenchmarkRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AsyncBenchmarksResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncBenchmarksResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncBenchmarksResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncBenchmarksResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncBenchmarksResourceWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        benchmark_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Benchmark:
-        """
-        Get a benchmark by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return await self._get(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Benchmark,
-        )
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> BenchmarkListResponse:
-        """List all benchmarks."""
-        return await self._get(
-            "/v1alpha/eval/benchmarks",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[BenchmarkListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[BenchmarkListResponse], DataWrapper[BenchmarkListResponse]),
-        )
-
-    async def register(
-        self,
-        *,
-        benchmark_id: str,
-        dataset_id: str,
-        scoring_functions: SequenceNotStr[str],
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | Omit = omit,
-        provider_benchmark_id: str | Omit = omit,
-        provider_id: str | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Register a benchmark.
-
-        Args:
-          benchmark_id: The ID of the benchmark to register.
-
-          dataset_id: The ID of the dataset to use for the benchmark.
-
-          scoring_functions: The scoring functions to use for the benchmark.
-
-          metadata: The metadata to use for the benchmark.
-
-          provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
-
-          provider_id: The ID of the provider to use for the benchmark.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._post(
-            "/v1alpha/eval/benchmarks",
-            body=await async_maybe_transform(
-                {
-                    "benchmark_id": benchmark_id,
-                    "dataset_id": dataset_id,
-                    "scoring_functions": scoring_functions,
-                    "metadata": metadata,
-                    "provider_benchmark_id": provider_benchmark_id,
-                    "provider_id": provider_id,
-                },
-                benchmark_register_params.BenchmarkRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class BenchmarksResourceWithRawResponse:
-    def __init__(self, benchmarks: BenchmarksResource) -> None:
-        self._benchmarks = benchmarks
-
-        self.retrieve = to_raw_response_wrapper(
-            benchmarks.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            benchmarks.list,
-        )
-        self.register = to_raw_response_wrapper(
-            benchmarks.register,
-        )
-
-
-class AsyncBenchmarksResourceWithRawResponse:
-    def __init__(self, benchmarks: AsyncBenchmarksResource) -> None:
-        self._benchmarks = benchmarks
-
-        self.retrieve = async_to_raw_response_wrapper(
-            benchmarks.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            benchmarks.list,
-        )
-        self.register = async_to_raw_response_wrapper(
-            benchmarks.register,
-        )
-
-
-class BenchmarksResourceWithStreamingResponse:
-    def __init__(self, benchmarks: BenchmarksResource) -> None:
-        self._benchmarks = benchmarks
-
-        self.retrieve = to_streamed_response_wrapper(
-            benchmarks.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            benchmarks.list,
-        )
-        self.register = to_streamed_response_wrapper(
-            benchmarks.register,
-        )
-
-
-class AsyncBenchmarksResourceWithStreamingResponse:
-    def __init__(self, benchmarks: AsyncBenchmarksResource) -> None:
-        self._benchmarks = benchmarks
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            benchmarks.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            benchmarks.list,
-        )
-        self.register = async_to_streamed_response_wrapper(
-            benchmarks.register,
-        )
diff --git a/src/llama_stack_client/resources/alpha/eval/__init__.py b/src/llama_stack_client/resources/alpha/eval/__init__.py
deleted file mode 100644
index 3aa93594..00000000
--- a/src/llama_stack_client/resources/alpha/eval/__init__.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .eval import (
-    EvalResource,
-    AsyncEvalResource,
-    EvalResourceWithRawResponse,
-    AsyncEvalResourceWithRawResponse,
-    EvalResourceWithStreamingResponse,
-    AsyncEvalResourceWithStreamingResponse,
-)
-from .jobs import (
-    JobsResource,
-    AsyncJobsResource,
-    JobsResourceWithRawResponse,
-    AsyncJobsResourceWithRawResponse,
-    JobsResourceWithStreamingResponse,
-    AsyncJobsResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "JobsResource",
-    "AsyncJobsResource",
-    "JobsResourceWithRawResponse",
-    "AsyncJobsResourceWithRawResponse",
-    "JobsResourceWithStreamingResponse",
-    "AsyncJobsResourceWithStreamingResponse",
-    "EvalResource",
-    "AsyncEvalResource",
-    "EvalResourceWithRawResponse",
-    "AsyncEvalResourceWithRawResponse",
-    "EvalResourceWithStreamingResponse",
-    "AsyncEvalResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/alpha/eval/eval.py b/src/llama_stack_client/resources/alpha/eval/eval.py
deleted file mode 100644
index 89101510..00000000
--- a/src/llama_stack_client/resources/alpha/eval/eval.py
+++ /dev/null
@@ -1,536 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-
-import httpx
-
-from .jobs import (
-    JobsResource,
-    AsyncJobsResource,
-    JobsResourceWithRawResponse,
-    AsyncJobsResourceWithRawResponse,
-    JobsResourceWithStreamingResponse,
-    AsyncJobsResourceWithStreamingResponse,
-)
-from ...._types import Body, Query, Headers, NotGiven, SequenceNotStr, not_given
-from ...._utils import maybe_transform, async_maybe_transform
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ....types.alpha import (
-    eval_run_eval_params,
-    eval_evaluate_rows_params,
-    eval_run_eval_alpha_params,
-    eval_evaluate_rows_alpha_params,
-)
-from ...._base_client import make_request_options
-from ....types.alpha.job import Job
-from ....types.alpha.evaluate_response import EvaluateResponse
-from ....types.alpha.benchmark_config_param import BenchmarkConfigParam
-
-__all__ = ["EvalResource", "AsyncEvalResource"]
-
-
-class EvalResource(SyncAPIResource):
-    @cached_property
-    def jobs(self) -> JobsResource:
-        return JobsResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> EvalResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return EvalResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> EvalResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return EvalResourceWithStreamingResponse(self)
-
-    def evaluate_rows(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        scoring_functions: SequenceNotStr[str],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> EvaluateResponse:
-        """
-        Evaluate a list of rows on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          input_rows: The rows to evaluate.
-
-          scoring_functions: The scoring functions to use for the evaluation.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return self._post(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
-            body=maybe_transform(
-                {
-                    "benchmark_config": benchmark_config,
-                    "input_rows": input_rows,
-                    "scoring_functions": scoring_functions,
-                },
-                eval_evaluate_rows_params.EvalEvaluateRowsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=EvaluateResponse,
-        )
-
-    def evaluate_rows_alpha(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        scoring_functions: SequenceNotStr[str],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> EvaluateResponse:
-        """
-        Evaluate a list of rows on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          input_rows: The rows to evaluate.
-
-          scoring_functions: The scoring functions to use for the evaluation.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return self._post(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
-            body=maybe_transform(
-                {
-                    "benchmark_config": benchmark_config,
-                    "input_rows": input_rows,
-                    "scoring_functions": scoring_functions,
-                },
-                eval_evaluate_rows_alpha_params.EvalEvaluateRowsAlphaParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=EvaluateResponse,
-        )
-
-    def run_eval(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Job:
-        """
-        Run an evaluation on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return self._post(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
-            body=maybe_transform({"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Job,
-        )
-
-    def run_eval_alpha(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Job:
-        """
-        Run an evaluation on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return self._post(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
-            body=maybe_transform(
-                {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Job,
-        )
-
-
-class AsyncEvalResource(AsyncAPIResource):
-    @cached_property
-    def jobs(self) -> AsyncJobsResource:
-        return AsyncJobsResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncEvalResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncEvalResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncEvalResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncEvalResourceWithStreamingResponse(self)
-
-    async def evaluate_rows(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        scoring_functions: SequenceNotStr[str],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> EvaluateResponse:
-        """
-        Evaluate a list of rows on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          input_rows: The rows to evaluate.
-
-          scoring_functions: The scoring functions to use for the evaluation.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return await self._post(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
-            body=await async_maybe_transform(
-                {
-                    "benchmark_config": benchmark_config,
-                    "input_rows": input_rows,
-                    "scoring_functions": scoring_functions,
-                },
-                eval_evaluate_rows_params.EvalEvaluateRowsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=EvaluateResponse,
-        )
-
-    async def evaluate_rows_alpha(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        scoring_functions: SequenceNotStr[str],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> EvaluateResponse:
-        """
-        Evaluate a list of rows on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          input_rows: The rows to evaluate.
-
-          scoring_functions: The scoring functions to use for the evaluation.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return await self._post(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
-            body=await async_maybe_transform(
-                {
-                    "benchmark_config": benchmark_config,
-                    "input_rows": input_rows,
-                    "scoring_functions": scoring_functions,
-                },
-                eval_evaluate_rows_alpha_params.EvalEvaluateRowsAlphaParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=EvaluateResponse,
-        )
-
-    async def run_eval(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Job:
-        """
-        Run an evaluation on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return await self._post(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
-            body=await async_maybe_transform(
-                {"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Job,
-        )
-
-    async def run_eval_alpha(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Job:
-        """
-        Run an evaluation on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return await self._post(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
-            body=await async_maybe_transform(
-                {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Job,
-        )
-
-
-class EvalResourceWithRawResponse:
-    def __init__(self, eval: EvalResource) -> None:
-        self._eval = eval
-
-        self.evaluate_rows = to_raw_response_wrapper(
-            eval.evaluate_rows,
-        )
-        self.evaluate_rows_alpha = to_raw_response_wrapper(
-            eval.evaluate_rows_alpha,
-        )
-        self.run_eval = to_raw_response_wrapper(
-            eval.run_eval,
-        )
-        self.run_eval_alpha = to_raw_response_wrapper(
-            eval.run_eval_alpha,
-        )
-
-    @cached_property
-    def jobs(self) -> JobsResourceWithRawResponse:
-        return JobsResourceWithRawResponse(self._eval.jobs)
-
-
-class AsyncEvalResourceWithRawResponse:
-    def __init__(self, eval: AsyncEvalResource) -> None:
-        self._eval = eval
-
-        self.evaluate_rows = async_to_raw_response_wrapper(
-            eval.evaluate_rows,
-        )
-        self.evaluate_rows_alpha = async_to_raw_response_wrapper(
-            eval.evaluate_rows_alpha,
-        )
-        self.run_eval = async_to_raw_response_wrapper(
-            eval.run_eval,
-        )
-        self.run_eval_alpha = async_to_raw_response_wrapper(
-            eval.run_eval_alpha,
-        )
-
-    @cached_property
-    def jobs(self) -> AsyncJobsResourceWithRawResponse:
-        return AsyncJobsResourceWithRawResponse(self._eval.jobs)
-
-
-class EvalResourceWithStreamingResponse:
-    def __init__(self, eval: EvalResource) -> None:
-        self._eval = eval
-
-        self.evaluate_rows = to_streamed_response_wrapper(
-            eval.evaluate_rows,
-        )
-        self.evaluate_rows_alpha = to_streamed_response_wrapper(
-            eval.evaluate_rows_alpha,
-        )
-        self.run_eval = to_streamed_response_wrapper(
-            eval.run_eval,
-        )
-        self.run_eval_alpha = to_streamed_response_wrapper(
-            eval.run_eval_alpha,
-        )
-
-    @cached_property
-    def jobs(self) -> JobsResourceWithStreamingResponse:
-        return JobsResourceWithStreamingResponse(self._eval.jobs)
-
-
-class AsyncEvalResourceWithStreamingResponse:
-    def __init__(self, eval: AsyncEvalResource) -> None:
-        self._eval = eval
-
-        self.evaluate_rows = async_to_streamed_response_wrapper(
-            eval.evaluate_rows,
-        )
-        self.evaluate_rows_alpha = async_to_streamed_response_wrapper(
-            eval.evaluate_rows_alpha,
-        )
-        self.run_eval = async_to_streamed_response_wrapper(
-            eval.run_eval,
-        )
-        self.run_eval_alpha = async_to_streamed_response_wrapper(
-            eval.run_eval_alpha,
-        )
-
-    @cached_property
-    def jobs(self) -> AsyncJobsResourceWithStreamingResponse:
-        return AsyncJobsResourceWithStreamingResponse(self._eval.jobs)
diff --git a/src/llama_stack_client/resources/alpha/eval/jobs.py b/src/llama_stack_client/resources/alpha/eval/jobs.py
deleted file mode 100644
index 94eed41e..00000000
--- a/src/llama_stack_client/resources/alpha/eval/jobs.py
+++ /dev/null
@@ -1,346 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import httpx
-
-from ...._types import Body, Query, Headers, NoneType, NotGiven, not_given
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ...._base_client import make_request_options
-from ....types.alpha.job import Job
-from ....types.alpha.evaluate_response import EvaluateResponse
-
-__all__ = ["JobsResource", "AsyncJobsResource"]
-
-
-class JobsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> JobsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return JobsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> JobsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return JobsResourceWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        job_id: str,
-        *,
-        benchmark_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> EvaluateResponse:
-        """
-        Get the result of a job.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        if not job_id:
-            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        return self._get(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=EvaluateResponse,
-        )
-
-    def cancel(
-        self,
-        job_id: str,
-        *,
-        benchmark_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Cancel a job.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        if not job_id:
-            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._delete(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    def status(
-        self,
-        job_id: str,
-        *,
-        benchmark_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Job:
-        """
-        Get the status of a job.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        if not job_id:
-            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        return self._get(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Job,
-        )
-
-
-class AsyncJobsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncJobsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncJobsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncJobsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncJobsResourceWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        job_id: str,
-        *,
-        benchmark_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> EvaluateResponse:
-        """
-        Get the result of a job.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        if not job_id:
-            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        return await self._get(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=EvaluateResponse,
-        )
-
-    async def cancel(
-        self,
-        job_id: str,
-        *,
-        benchmark_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Cancel a job.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        if not job_id:
-            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._delete(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    async def status(
-        self,
-        job_id: str,
-        *,
-        benchmark_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Job:
-        """
-        Get the status of a job.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        if not job_id:
-            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        return await self._get(
-            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Job,
-        )
-
-
-class JobsResourceWithRawResponse:
-    def __init__(self, jobs: JobsResource) -> None:
-        self._jobs = jobs
-
-        self.retrieve = to_raw_response_wrapper(
-            jobs.retrieve,
-        )
-        self.cancel = to_raw_response_wrapper(
-            jobs.cancel,
-        )
-        self.status = to_raw_response_wrapper(
-            jobs.status,
-        )
-
-
-class AsyncJobsResourceWithRawResponse:
-    def __init__(self, jobs: AsyncJobsResource) -> None:
-        self._jobs = jobs
-
-        self.retrieve = async_to_raw_response_wrapper(
-            jobs.retrieve,
-        )
-        self.cancel = async_to_raw_response_wrapper(
-            jobs.cancel,
-        )
-        self.status = async_to_raw_response_wrapper(
-            jobs.status,
-        )
-
-
-class JobsResourceWithStreamingResponse:
-    def __init__(self, jobs: JobsResource) -> None:
-        self._jobs = jobs
-
-        self.retrieve = to_streamed_response_wrapper(
-            jobs.retrieve,
-        )
-        self.cancel = to_streamed_response_wrapper(
-            jobs.cancel,
-        )
-        self.status = to_streamed_response_wrapper(
-            jobs.status,
-        )
-
-
-class AsyncJobsResourceWithStreamingResponse:
-    def __init__(self, jobs: AsyncJobsResource) -> None:
-        self._jobs = jobs
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            jobs.retrieve,
-        )
-        self.cancel = async_to_streamed_response_wrapper(
-            jobs.cancel,
-        )
-        self.status = async_to_streamed_response_wrapper(
-            jobs.status,
-        )
diff --git a/src/llama_stack_client/resources/alpha/inference.py b/src/llama_stack_client/resources/alpha/inference.py
deleted file mode 100644
index 9db21d26..00000000
--- a/src/llama_stack_client/resources/alpha/inference.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Type, cast
-
-import httpx
-
-from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._wrappers import DataWrapper
-from ...types.alpha import inference_rerank_params
-from ..._base_client import make_request_options
-from ...types.alpha.inference_rerank_response import InferenceRerankResponse
-
-__all__ = ["InferenceResource", "AsyncInferenceResource"]
-
-
-class InferenceResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> InferenceResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return InferenceResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> InferenceResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return InferenceResourceWithStreamingResponse(self)
-
-    def rerank(
-        self,
-        *,
-        items: SequenceNotStr[inference_rerank_params.Item],
-        model: str,
-        query: inference_rerank_params.Query,
-        max_num_results: int | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> InferenceRerankResponse:
-        """
-        Rerank a list of documents based on their relevance to a query.
-
-        Args:
-          items: List of items to rerank. Each item can be a string, text content part, or image
-              content part. Each input must not exceed the model's max input token length.
-
-          model: The identifier of the reranking model to use.
-
-          query: The search query to rank items against. Can be a string, text content part, or
-              image content part. The input must not exceed the model's max input token
-              length.
-
-          max_num_results: (Optional) Maximum number of results to return. Default: returns all.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1alpha/inference/rerank",
-            body=maybe_transform(
-                {
-                    "items": items,
-                    "model": model,
-                    "query": query,
-                    "max_num_results": max_num_results,
-                },
-                inference_rerank_params.InferenceRerankParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[InferenceRerankResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[InferenceRerankResponse], DataWrapper[InferenceRerankResponse]),
-        )
-
-
-class AsyncInferenceResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncInferenceResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncInferenceResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncInferenceResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncInferenceResourceWithStreamingResponse(self)
-
-    async def rerank(
-        self,
-        *,
-        items: SequenceNotStr[inference_rerank_params.Item],
-        model: str,
-        query: inference_rerank_params.Query,
-        max_num_results: int | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> InferenceRerankResponse:
-        """
-        Rerank a list of documents based on their relevance to a query.
-
-        Args:
-          items: List of items to rerank. Each item can be a string, text content part, or image
-              content part. Each input must not exceed the model's max input token length.
-
-          model: The identifier of the reranking model to use.
-
-          query: The search query to rank items against. Can be a string, text content part, or
-              image content part. The input must not exceed the model's max input token
-              length.
-
-          max_num_results: (Optional) Maximum number of results to return. Default: returns all.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1alpha/inference/rerank",
-            body=await async_maybe_transform(
-                {
-                    "items": items,
-                    "model": model,
-                    "query": query,
-                    "max_num_results": max_num_results,
-                },
-                inference_rerank_params.InferenceRerankParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[InferenceRerankResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[InferenceRerankResponse], DataWrapper[InferenceRerankResponse]),
-        )
-
-
-class InferenceResourceWithRawResponse:
-    def __init__(self, inference: InferenceResource) -> None:
-        self._inference = inference
-
-        self.rerank = to_raw_response_wrapper(
-            inference.rerank,
-        )
-
-
-class AsyncInferenceResourceWithRawResponse:
-    def __init__(self, inference: AsyncInferenceResource) -> None:
-        self._inference = inference
-
-        self.rerank = async_to_raw_response_wrapper(
-            inference.rerank,
-        )
-
-
-class InferenceResourceWithStreamingResponse:
-    def __init__(self, inference: InferenceResource) -> None:
-        self._inference = inference
-
-        self.rerank = to_streamed_response_wrapper(
-            inference.rerank,
-        )
-
-
-class AsyncInferenceResourceWithStreamingResponse:
-    def __init__(self, inference: AsyncInferenceResource) -> None:
-        self._inference = inference
-
-        self.rerank = async_to_streamed_response_wrapper(
-            inference.rerank,
-        )
diff --git a/src/llama_stack_client/resources/alpha/post_training/__init__.py b/src/llama_stack_client/resources/alpha/post_training/__init__.py
deleted file mode 100644
index 81a6a807..00000000
--- a/src/llama_stack_client/resources/alpha/post_training/__init__.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .job import (
-    JobResource,
-    AsyncJobResource,
-    JobResourceWithRawResponse,
-    AsyncJobResourceWithRawResponse,
-    JobResourceWithStreamingResponse,
-    AsyncJobResourceWithStreamingResponse,
-)
-from .post_training import (
-    PostTrainingResource,
-    AsyncPostTrainingResource,
-    PostTrainingResourceWithRawResponse,
-    AsyncPostTrainingResourceWithRawResponse,
-    PostTrainingResourceWithStreamingResponse,
-    AsyncPostTrainingResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "JobResource",
-    "AsyncJobResource",
-    "JobResourceWithRawResponse",
-    "AsyncJobResourceWithRawResponse",
-    "JobResourceWithStreamingResponse",
-    "AsyncJobResourceWithStreamingResponse",
-    "PostTrainingResource",
-    "AsyncPostTrainingResource",
-    "PostTrainingResourceWithRawResponse",
-    "AsyncPostTrainingResourceWithRawResponse",
-    "PostTrainingResourceWithStreamingResponse",
-    "AsyncPostTrainingResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/alpha/post_training/job.py b/src/llama_stack_client/resources/alpha/post_training/job.py
deleted file mode 100644
index 8e09f335..00000000
--- a/src/llama_stack_client/resources/alpha/post_training/job.py
+++ /dev/null
@@ -1,410 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Type, cast
-
-import httpx
-
-from ...._types import Body, Query, Headers, NoneType, NotGiven, not_given
-from ...._utils import maybe_transform, async_maybe_transform
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ...._wrappers import DataWrapper
-from ...._base_client import make_request_options
-from ....types.alpha.post_training import job_cancel_params, job_status_params, job_artifacts_params
-from ....types.alpha.post_training.job_list_response import JobListResponse
-from ....types.alpha.post_training.job_status_response import JobStatusResponse
-from ....types.alpha.post_training.job_artifacts_response import JobArtifactsResponse
-
-__all__ = ["JobResource", "AsyncJobResource"]
-
-
-class JobResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> JobResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return JobResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> JobResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return JobResourceWithStreamingResponse(self)
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> JobListResponse:
-        """Get all training jobs."""
-        return self._get(
-            "/v1alpha/post-training/jobs",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[JobListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[JobListResponse], DataWrapper[JobListResponse]),
-        )
-
-    def artifacts(
-        self,
-        *,
-        job_uuid: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> JobArtifactsResponse:
-        """
-        Get the artifacts of a training job.
-
-        Args:
-          job_uuid: The UUID of the job to get the artifacts of.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get(
-            "/v1alpha/post-training/job/artifacts",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform({"job_uuid": job_uuid}, job_artifacts_params.JobArtifactsParams),
-            ),
-            cast_to=JobArtifactsResponse,
-        )
-
-    def cancel(
-        self,
-        *,
-        job_uuid: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Cancel a training job.
-
-        Args:
-          job_uuid: The UUID of the job to cancel.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._post(
-            "/v1alpha/post-training/job/cancel",
-            body=maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    def status(
-        self,
-        *,
-        job_uuid: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> JobStatusResponse:
-        """
-        Get the status of a training job.
-
-        Args:
-          job_uuid: The UUID of the job to get the status of.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get(
-            "/v1alpha/post-training/job/status",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform({"job_uuid": job_uuid}, job_status_params.JobStatusParams),
-            ),
-            cast_to=JobStatusResponse,
-        )
-
-
-class AsyncJobResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncJobResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncJobResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncJobResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncJobResourceWithStreamingResponse(self)
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> JobListResponse:
-        """Get all training jobs."""
-        return await self._get(
-            "/v1alpha/post-training/jobs",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[JobListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[JobListResponse], DataWrapper[JobListResponse]),
-        )
-
-    async def artifacts(
-        self,
-        *,
-        job_uuid: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> JobArtifactsResponse:
-        """
-        Get the artifacts of a training job.
-
-        Args:
-          job_uuid: The UUID of the job to get the artifacts of.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._get(
-            "/v1alpha/post-training/job/artifacts",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform({"job_uuid": job_uuid}, job_artifacts_params.JobArtifactsParams),
-            ),
-            cast_to=JobArtifactsResponse,
-        )
-
-    async def cancel(
-        self,
-        *,
-        job_uuid: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Cancel a training job.
-
-        Args:
-          job_uuid: The UUID of the job to cancel.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._post(
-            "/v1alpha/post-training/job/cancel",
-            body=await async_maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    async def status(
-        self,
-        *,
-        job_uuid: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> JobStatusResponse:
-        """
-        Get the status of a training job.
-
-        Args:
-          job_uuid: The UUID of the job to get the status of.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._get(
-            "/v1alpha/post-training/job/status",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform({"job_uuid": job_uuid}, job_status_params.JobStatusParams),
-            ),
-            cast_to=JobStatusResponse,
-        )
-
-
-class JobResourceWithRawResponse:
-    def __init__(self, job: JobResource) -> None:
-        self._job = job
-
-        self.list = to_raw_response_wrapper(
-            job.list,
-        )
-        self.artifacts = to_raw_response_wrapper(
-            job.artifacts,
-        )
-        self.cancel = to_raw_response_wrapper(
-            job.cancel,
-        )
-        self.status = to_raw_response_wrapper(
-            job.status,
-        )
-
-
-class AsyncJobResourceWithRawResponse:
-    def __init__(self, job: AsyncJobResource) -> None:
-        self._job = job
-
-        self.list = async_to_raw_response_wrapper(
-            job.list,
-        )
-        self.artifacts = async_to_raw_response_wrapper(
-            job.artifacts,
-        )
-        self.cancel = async_to_raw_response_wrapper(
-            job.cancel,
-        )
-        self.status = async_to_raw_response_wrapper(
-            job.status,
-        )
-
-
-class JobResourceWithStreamingResponse:
-    def __init__(self, job: JobResource) -> None:
-        self._job = job
-
-        self.list = to_streamed_response_wrapper(
-            job.list,
-        )
-        self.artifacts = to_streamed_response_wrapper(
-            job.artifacts,
-        )
-        self.cancel = to_streamed_response_wrapper(
-            job.cancel,
-        )
-        self.status = to_streamed_response_wrapper(
-            job.status,
-        )
-
-
-class AsyncJobResourceWithStreamingResponse:
-    def __init__(self, job: AsyncJobResource) -> None:
-        self._job = job
-
-        self.list = async_to_streamed_response_wrapper(
-            job.list,
-        )
-        self.artifacts = async_to_streamed_response_wrapper(
-            job.artifacts,
-        )
-        self.cancel = async_to_streamed_response_wrapper(
-            job.cancel,
-        )
-        self.status = async_to_streamed_response_wrapper(
-            job.status,
-        )
diff --git a/src/llama_stack_client/resources/alpha/post_training/post_training.py b/src/llama_stack_client/resources/alpha/post_training/post_training.py
deleted file mode 100644
index 9b1fe87a..00000000
--- a/src/llama_stack_client/resources/alpha/post_training/post_training.py
+++ /dev/null
@@ -1,399 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-
-import httpx
-
-from .job import (
-    JobResource,
-    AsyncJobResource,
-    JobResourceWithRawResponse,
-    AsyncJobResourceWithRawResponse,
-    JobResourceWithStreamingResponse,
-    AsyncJobResourceWithStreamingResponse,
-)
-from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
-from ...._utils import maybe_transform, async_maybe_transform
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ....types.alpha import (
-    post_training_preference_optimize_params,
-    post_training_supervised_fine_tune_params,
-)
-from ...._base_client import make_request_options
-from ....types.alpha.post_training_job import PostTrainingJob
-from ....types.alpha.algorithm_config_param import AlgorithmConfigParam
-
-__all__ = ["PostTrainingResource", "AsyncPostTrainingResource"]
-
-
-class PostTrainingResource(SyncAPIResource):
-    @cached_property
-    def job(self) -> JobResource:
-        return JobResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> PostTrainingResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return PostTrainingResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> PostTrainingResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return PostTrainingResourceWithStreamingResponse(self)
-
-    def preference_optimize(
-        self,
-        *,
-        algorithm_config: post_training_preference_optimize_params.AlgorithmConfig,
-        finetuned_model: str,
-        hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        job_uuid: str,
-        logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        training_config: post_training_preference_optimize_params.TrainingConfig,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> PostTrainingJob:
-        """
-        Run preference optimization of a model.
-
-        Args:
-          algorithm_config: The algorithm configuration.
-
-          finetuned_model: The model to fine-tune.
-
-          hyperparam_search_config: The hyperparam search configuration.
-
-          job_uuid: The UUID of the job to create.
-
-          logger_config: The logger configuration.
-
-          training_config: The training configuration.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1alpha/post-training/preference-optimize",
-            body=maybe_transform(
-                {
-                    "algorithm_config": algorithm_config,
-                    "finetuned_model": finetuned_model,
-                    "hyperparam_search_config": hyperparam_search_config,
-                    "job_uuid": job_uuid,
-                    "logger_config": logger_config,
-                    "training_config": training_config,
-                },
-                post_training_preference_optimize_params.PostTrainingPreferenceOptimizeParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PostTrainingJob,
-        )
-
-    def supervised_fine_tune(
-        self,
-        *,
-        hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        job_uuid: str,
-        logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        training_config: post_training_supervised_fine_tune_params.TrainingConfig,
-        algorithm_config: AlgorithmConfigParam | Omit = omit,
-        checkpoint_dir: str | Omit = omit,
-        model: str | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> PostTrainingJob:
-        """
-        Run supervised fine-tuning of a model.
-
-        Args:
-          hyperparam_search_config: The hyperparam search configuration.
-
-          job_uuid: The UUID of the job to create.
-
-          logger_config: The logger configuration.
-
-          training_config: The training configuration.
-
-          algorithm_config: The algorithm configuration.
-
-          checkpoint_dir: The directory to save checkpoint(s) to.
-
-          model: The model to fine-tune.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1alpha/post-training/supervised-fine-tune",
-            body=maybe_transform(
-                {
-                    "hyperparam_search_config": hyperparam_search_config,
-                    "job_uuid": job_uuid,
-                    "logger_config": logger_config,
-                    "training_config": training_config,
-                    "algorithm_config": algorithm_config,
-                    "checkpoint_dir": checkpoint_dir,
-                    "model": model,
-                },
-                post_training_supervised_fine_tune_params.PostTrainingSupervisedFineTuneParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PostTrainingJob,
-        )
-
-
-class AsyncPostTrainingResource(AsyncAPIResource):
-    @cached_property
-    def job(self) -> AsyncJobResource:
-        return AsyncJobResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncPostTrainingResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncPostTrainingResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncPostTrainingResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncPostTrainingResourceWithStreamingResponse(self)
-
-    async def preference_optimize(
-        self,
-        *,
-        algorithm_config: post_training_preference_optimize_params.AlgorithmConfig,
-        finetuned_model: str,
-        hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        job_uuid: str,
-        logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        training_config: post_training_preference_optimize_params.TrainingConfig,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> PostTrainingJob:
-        """
-        Run preference optimization of a model.
-
-        Args:
-          algorithm_config: The algorithm configuration.
-
-          finetuned_model: The model to fine-tune.
-
-          hyperparam_search_config: The hyperparam search configuration.
-
-          job_uuid: The UUID of the job to create.
-
-          logger_config: The logger configuration.
-
-          training_config: The training configuration.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1alpha/post-training/preference-optimize",
-            body=await async_maybe_transform(
-                {
-                    "algorithm_config": algorithm_config,
-                    "finetuned_model": finetuned_model,
-                    "hyperparam_search_config": hyperparam_search_config,
-                    "job_uuid": job_uuid,
-                    "logger_config": logger_config,
-                    "training_config": training_config,
-                },
-                post_training_preference_optimize_params.PostTrainingPreferenceOptimizeParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PostTrainingJob,
-        )
-
-    async def supervised_fine_tune(
-        self,
-        *,
-        hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        job_uuid: str,
-        logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        training_config: post_training_supervised_fine_tune_params.TrainingConfig,
-        algorithm_config: AlgorithmConfigParam | Omit = omit,
-        checkpoint_dir: str | Omit = omit,
-        model: str | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> PostTrainingJob:
-        """
-        Run supervised fine-tuning of a model.
-
-        Args:
-          hyperparam_search_config: The hyperparam search configuration.
-
-          job_uuid: The UUID of the job to create.
-
-          logger_config: The logger configuration.
-
-          training_config: The training configuration.
-
-          algorithm_config: The algorithm configuration.
-
-          checkpoint_dir: The directory to save checkpoint(s) to.
-
-          model: The model to fine-tune.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1alpha/post-training/supervised-fine-tune",
-            body=await async_maybe_transform(
-                {
-                    "hyperparam_search_config": hyperparam_search_config,
-                    "job_uuid": job_uuid,
-                    "logger_config": logger_config,
-                    "training_config": training_config,
-                    "algorithm_config": algorithm_config,
-                    "checkpoint_dir": checkpoint_dir,
-                    "model": model,
-                },
-                post_training_supervised_fine_tune_params.PostTrainingSupervisedFineTuneParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PostTrainingJob,
-        )
-
-
-class PostTrainingResourceWithRawResponse:
-    def __init__(self, post_training: PostTrainingResource) -> None:
-        self._post_training = post_training
-
-        self.preference_optimize = to_raw_response_wrapper(
-            post_training.preference_optimize,
-        )
-        self.supervised_fine_tune = to_raw_response_wrapper(
-            post_training.supervised_fine_tune,
-        )
-
-    @cached_property
-    def job(self) -> JobResourceWithRawResponse:
-        return JobResourceWithRawResponse(self._post_training.job)
-
-
-class AsyncPostTrainingResourceWithRawResponse:
-    def __init__(self, post_training: AsyncPostTrainingResource) -> None:
-        self._post_training = post_training
-
-        self.preference_optimize = async_to_raw_response_wrapper(
-            post_training.preference_optimize,
-        )
-        self.supervised_fine_tune = async_to_raw_response_wrapper(
-            post_training.supervised_fine_tune,
-        )
-
-    @cached_property
-    def job(self) -> AsyncJobResourceWithRawResponse:
-        return AsyncJobResourceWithRawResponse(self._post_training.job)
-
-
-class PostTrainingResourceWithStreamingResponse:
-    def __init__(self, post_training: PostTrainingResource) -> None:
-        self._post_training = post_training
-
-        self.preference_optimize = to_streamed_response_wrapper(
-            post_training.preference_optimize,
-        )
-        self.supervised_fine_tune = to_streamed_response_wrapper(
-            post_training.supervised_fine_tune,
-        )
-
-    @cached_property
-    def job(self) -> JobResourceWithStreamingResponse:
-        return JobResourceWithStreamingResponse(self._post_training.job)
-
-
-class AsyncPostTrainingResourceWithStreamingResponse:
-    def __init__(self, post_training: AsyncPostTrainingResource) -> None:
-        self._post_training = post_training
-
-        self.preference_optimize = async_to_streamed_response_wrapper(
-            post_training.preference_optimize,
-        )
-        self.supervised_fine_tune = async_to_streamed_response_wrapper(
-            post_training.supervised_fine_tune,
-        )
-
-    @cached_property
-    def job(self) -> AsyncJobResourceWithStreamingResponse:
-        return AsyncJobResourceWithStreamingResponse(self._post_training.job)
diff --git a/src/llama_stack_client/resources/beta/__init__.py b/src/llama_stack_client/resources/beta/__init__.py
deleted file mode 100644
index 6fd69c43..00000000
--- a/src/llama_stack_client/resources/beta/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .beta import (
-    BetaResource,
-    AsyncBetaResource,
-    BetaResourceWithRawResponse,
-    AsyncBetaResourceWithRawResponse,
-    BetaResourceWithStreamingResponse,
-    AsyncBetaResourceWithStreamingResponse,
-)
-from .datasets import (
-    DatasetsResource,
-    AsyncDatasetsResource,
-    DatasetsResourceWithRawResponse,
-    AsyncDatasetsResourceWithRawResponse,
-    DatasetsResourceWithStreamingResponse,
-    AsyncDatasetsResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "DatasetsResource",
-    "AsyncDatasetsResource",
-    "DatasetsResourceWithRawResponse",
-    "AsyncDatasetsResourceWithRawResponse",
-    "DatasetsResourceWithStreamingResponse",
-    "AsyncDatasetsResourceWithStreamingResponse",
-    "BetaResource",
-    "AsyncBetaResource",
-    "BetaResourceWithRawResponse",
-    "AsyncBetaResourceWithRawResponse",
-    "BetaResourceWithStreamingResponse",
-    "AsyncBetaResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/beta/beta.py b/src/llama_stack_client/resources/beta/beta.py
deleted file mode 100644
index 7bf1c711..00000000
--- a/src/llama_stack_client/resources/beta/beta.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .datasets import (
-    DatasetsResource,
-    AsyncDatasetsResource,
-    DatasetsResourceWithRawResponse,
-    AsyncDatasetsResourceWithRawResponse,
-    DatasetsResourceWithStreamingResponse,
-    AsyncDatasetsResourceWithStreamingResponse,
-)
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-
-__all__ = ["BetaResource", "AsyncBetaResource"]
-
-
-class BetaResource(SyncAPIResource):
-    @cached_property
-    def datasets(self) -> DatasetsResource:
-        return DatasetsResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> BetaResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return BetaResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> BetaResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return BetaResourceWithStreamingResponse(self)
-
-
-class AsyncBetaResource(AsyncAPIResource):
-    @cached_property
-    def datasets(self) -> AsyncDatasetsResource:
-        return AsyncDatasetsResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncBetaResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncBetaResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncBetaResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncBetaResourceWithStreamingResponse(self)
-
-
-class BetaResourceWithRawResponse:
-    def __init__(self, beta: BetaResource) -> None:
-        self._beta = beta
-
-    @cached_property
-    def datasets(self) -> DatasetsResourceWithRawResponse:
-        return DatasetsResourceWithRawResponse(self._beta.datasets)
-
-
-class AsyncBetaResourceWithRawResponse:
-    def __init__(self, beta: AsyncBetaResource) -> None:
-        self._beta = beta
-
-    @cached_property
-    def datasets(self) -> AsyncDatasetsResourceWithRawResponse:
-        return AsyncDatasetsResourceWithRawResponse(self._beta.datasets)
-
-
-class BetaResourceWithStreamingResponse:
-    def __init__(self, beta: BetaResource) -> None:
-        self._beta = beta
-
-    @cached_property
-    def datasets(self) -> DatasetsResourceWithStreamingResponse:
-        return DatasetsResourceWithStreamingResponse(self._beta.datasets)
-
-
-class AsyncBetaResourceWithStreamingResponse:
-    def __init__(self, beta: AsyncBetaResource) -> None:
-        self._beta = beta
-
-    @cached_property
-    def datasets(self) -> AsyncDatasetsResourceWithStreamingResponse:
-        return AsyncDatasetsResourceWithStreamingResponse(self._beta.datasets)
diff --git a/src/llama_stack_client/resources/beta/datasets.py b/src/llama_stack_client/resources/beta/datasets.py
deleted file mode 100644
index 03321e48..00000000
--- a/src/llama_stack_client/resources/beta/datasets.py
+++ /dev/null
@@ -1,682 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Type, Union, Iterable, cast
-from typing_extensions import Literal
-
-import httpx
-
-from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._wrappers import DataWrapper
-from ...types.beta import dataset_iterrows_params, dataset_register_params, dataset_appendrows_params
-from ..._base_client import make_request_options
-from ...types.beta.dataset_list_response import DatasetListResponse
-from ...types.beta.dataset_iterrows_response import DatasetIterrowsResponse
-from ...types.beta.dataset_register_response import DatasetRegisterResponse
-from ...types.beta.dataset_retrieve_response import DatasetRetrieveResponse
-
-__all__ = ["DatasetsResource", "AsyncDatasetsResource"]
-
-
-class DatasetsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> DatasetsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return DatasetsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> DatasetsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return DatasetsResourceWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        dataset_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> DatasetRetrieveResponse:
-        """
-        Get a dataset by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        return self._get(
-            f"/v1beta/datasets/{dataset_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=DatasetRetrieveResponse,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> DatasetListResponse:
-        """List all datasets."""
-        return self._get(
-            "/v1beta/datasets",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[DatasetListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[DatasetListResponse], DataWrapper[DatasetListResponse]),
-        )
-
-    def appendrows(
-        self,
-        dataset_id: str,
-        *,
-        rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Append rows to a dataset.
-
-        Args:
-          rows: The rows to append to the dataset.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._post(
-            f"/v1beta/datasetio/append-rows/{dataset_id}",
-            body=maybe_transform({"rows": rows}, dataset_appendrows_params.DatasetAppendrowsParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    def iterrows(
-        self,
-        dataset_id: str,
-        *,
-        limit: int | Omit = omit,
-        start_index: int | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> DatasetIterrowsResponse:
-        """Get a paginated list of rows from a dataset.
-
-        Uses offset-based pagination where:
-
-        - start_index: The starting index (0-based). If None, starts from beginning.
-        - limit: Number of items to return. If None or -1, returns all items.
-
-        The response includes:
-
-        - data: List of items for the current page.
-        - has_more: Whether there are more items available after this set.
-
-        Args:
-          limit: The number of rows to get.
-
-          start_index: Index into dataset for the first row to get. Get all rows if None.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        return self._get(
-            f"/v1beta/datasetio/iterrows/{dataset_id}",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "limit": limit,
-                        "start_index": start_index,
-                    },
-                    dataset_iterrows_params.DatasetIterrowsParams,
-                ),
-            ),
-            cast_to=DatasetIterrowsResponse,
-        )
-
-    def register(
-        self,
-        *,
-        purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"],
-        source: dataset_register_params.Source,
-        dataset_id: str | Omit = omit,
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> DatasetRegisterResponse:
-        """Register a new dataset.
-
-        Args:
-          purpose: The purpose of the dataset.
-
-        One of: - "post-training/messages": The dataset
-              contains a messages column with list of messages for post-training. {
-              "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
-              "assistant", "content": "Hello, world!"}, ] } - "eval/question-answer": The
-              dataset contains a question column and an answer column for evaluation. {
-              "question": "What is the capital of France?", "answer": "Paris" } -
-              "eval/messages-answer": The dataset contains a messages column with list of
-              messages and an answer column for evaluation. { "messages": [ {"role": "user",
-              "content": "Hello, my name is John Doe."}, {"role": "assistant", "content":
-              "Hello, John Doe. How can I help you today?"}, {"role": "user", "content":
-              "What's my name?"}, ], "answer": "John Doe" }
-
-          source: The data source of the dataset. Ensure that the data source schema is compatible
-              with the purpose of the dataset. Examples: - { "type": "uri", "uri":
-              "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-              "lsfs://mydata.jsonl" } - { "type": "uri", "uri":
-              "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
-              "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows", "rows": [
-              { "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
-              "assistant", "content": "Hello, world!"}, ] } ] }
-
-          dataset_id: The ID of the dataset. If not provided, an ID will be generated.
-
-          metadata: The metadata for the dataset. - E.g. {"description": "My dataset"}.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1beta/datasets",
-            body=maybe_transform(
-                {
-                    "purpose": purpose,
-                    "source": source,
-                    "dataset_id": dataset_id,
-                    "metadata": metadata,
-                },
-                dataset_register_params.DatasetRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=DatasetRegisterResponse,
-        )
-
-    def unregister(
-        self,
-        dataset_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Unregister a dataset by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._delete(
-            f"/v1beta/datasets/{dataset_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AsyncDatasetsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncDatasetsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncDatasetsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncDatasetsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncDatasetsResourceWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        dataset_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> DatasetRetrieveResponse:
-        """
-        Get a dataset by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        return await self._get(
-            f"/v1beta/datasets/{dataset_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=DatasetRetrieveResponse,
-        )
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> DatasetListResponse:
-        """List all datasets."""
-        return await self._get(
-            "/v1beta/datasets",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[DatasetListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[DatasetListResponse], DataWrapper[DatasetListResponse]),
-        )
-
-    async def appendrows(
-        self,
-        dataset_id: str,
-        *,
-        rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Append rows to a dataset.
-
-        Args:
-          rows: The rows to append to the dataset.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._post(
-            f"/v1beta/datasetio/append-rows/{dataset_id}",
-            body=await async_maybe_transform({"rows": rows}, dataset_appendrows_params.DatasetAppendrowsParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    async def iterrows(
-        self,
-        dataset_id: str,
-        *,
-        limit: int | Omit = omit,
-        start_index: int | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> DatasetIterrowsResponse:
-        """Get a paginated list of rows from a dataset.
-
-        Uses offset-based pagination where:
-
-        - start_index: The starting index (0-based). If None, starts from beginning.
-        - limit: Number of items to return. If None or -1, returns all items.
-
-        The response includes:
-
-        - data: List of items for the current page.
-        - has_more: Whether there are more items available after this set.
-
-        Args:
-          limit: The number of rows to get.
-
-          start_index: Index into dataset for the first row to get. Get all rows if None.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        return await self._get(
-            f"/v1beta/datasetio/iterrows/{dataset_id}",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform(
-                    {
-                        "limit": limit,
-                        "start_index": start_index,
-                    },
-                    dataset_iterrows_params.DatasetIterrowsParams,
-                ),
-            ),
-            cast_to=DatasetIterrowsResponse,
-        )
-
-    async def register(
-        self,
-        *,
-        purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"],
-        source: dataset_register_params.Source,
-        dataset_id: str | Omit = omit,
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> DatasetRegisterResponse:
-        """Register a new dataset.
-
-        Args:
-          purpose: The purpose of the dataset.
-
-        One of: - "post-training/messages": The dataset
-              contains a messages column with list of messages for post-training. {
-              "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
-              "assistant", "content": "Hello, world!"}, ] } - "eval/question-answer": The
-              dataset contains a question column and an answer column for evaluation. {
-              "question": "What is the capital of France?", "answer": "Paris" } -
-              "eval/messages-answer": The dataset contains a messages column with list of
-              messages and an answer column for evaluation. { "messages": [ {"role": "user",
-              "content": "Hello, my name is John Doe."}, {"role": "assistant", "content":
-              "Hello, John Doe. How can I help you today?"}, {"role": "user", "content":
-              "What's my name?"}, ], "answer": "John Doe" }
-
-          source: The data source of the dataset. Ensure that the data source schema is compatible
-              with the purpose of the dataset. Examples: - { "type": "uri", "uri":
-              "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-              "lsfs://mydata.jsonl" } - { "type": "uri", "uri":
-              "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
-              "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows", "rows": [
-              { "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
-              "assistant", "content": "Hello, world!"}, ] } ] }
-
-          dataset_id: The ID of the dataset. If not provided, an ID will be generated.
-
-          metadata: The metadata for the dataset. - E.g. {"description": "My dataset"}.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1beta/datasets",
-            body=await async_maybe_transform(
-                {
-                    "purpose": purpose,
-                    "source": source,
-                    "dataset_id": dataset_id,
-                    "metadata": metadata,
-                },
-                dataset_register_params.DatasetRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=DatasetRegisterResponse,
-        )
-
-    async def unregister(
-        self,
-        dataset_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> None:
-        """
-        Unregister a dataset by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._delete(
-            f"/v1beta/datasets/{dataset_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class DatasetsResourceWithRawResponse:
-    def __init__(self, datasets: DatasetsResource) -> None:
-        self._datasets = datasets
-
-        self.retrieve = to_raw_response_wrapper(
-            datasets.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            datasets.list,
-        )
-        self.appendrows = to_raw_response_wrapper(
-            datasets.appendrows,
-        )
-        self.iterrows = to_raw_response_wrapper(
-            datasets.iterrows,
-        )
-        self.register = to_raw_response_wrapper(
-            datasets.register,
-        )
-        self.unregister = to_raw_response_wrapper(
-            datasets.unregister,
-        )
-
-
-class AsyncDatasetsResourceWithRawResponse:
-    def __init__(self, datasets: AsyncDatasetsResource) -> None:
-        self._datasets = datasets
-
-        self.retrieve = async_to_raw_response_wrapper(
-            datasets.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            datasets.list,
-        )
-        self.appendrows = async_to_raw_response_wrapper(
-            datasets.appendrows,
-        )
-        self.iterrows = async_to_raw_response_wrapper(
-            datasets.iterrows,
-        )
-        self.register = async_to_raw_response_wrapper(
-            datasets.register,
-        )
-        self.unregister = async_to_raw_response_wrapper(
-            datasets.unregister,
-        )
-
-
-class DatasetsResourceWithStreamingResponse:
-    def __init__(self, datasets: DatasetsResource) -> None:
-        self._datasets = datasets
-
-        self.retrieve = to_streamed_response_wrapper(
-            datasets.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            datasets.list,
-        )
-        self.appendrows = to_streamed_response_wrapper(
-            datasets.appendrows,
-        )
-        self.iterrows = to_streamed_response_wrapper(
-            datasets.iterrows,
-        )
-        self.register = to_streamed_response_wrapper(
-            datasets.register,
-        )
-        self.unregister = to_streamed_response_wrapper(
-            datasets.unregister,
-        )
-
-
-class AsyncDatasetsResourceWithStreamingResponse:
-    def __init__(self, datasets: AsyncDatasetsResource) -> None:
-        self._datasets = datasets
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            datasets.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            datasets.list,
-        )
-        self.appendrows = async_to_streamed_response_wrapper(
-            datasets.appendrows,
-        )
-        self.iterrows = async_to_streamed_response_wrapper(
-            datasets.iterrows,
-        )
-        self.register = async_to_streamed_response_wrapper(
-            datasets.register,
-        )
-        self.unregister = async_to_streamed_response_wrapper(
-            datasets.unregister,
-        )
diff --git a/src/llama_stack_client/resources/responses/responses.py b/src/llama_stack_client/resources/responses/responses.py
index 6bc29a62..e0109583 100644
--- a/src/llama_stack_client/resources/responses/responses.py
+++ b/src/llama_stack_client/resources/responses/responses.py
@@ -78,6 +78,7 @@ def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         stream: Literal[False] | Omit = omit,
         temperature: float | Omit = omit,
@@ -108,6 +109,8 @@ def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
+          prompt: (Optional) Prompt object with ID, version, and variables.
+
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -132,6 +135,7 @@ def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         temperature: float | Omit = omit,
         text: response_create_params.Text | Omit = omit,
@@ -161,6 +165,8 @@ def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
+          prompt: (Optional) Prompt object with ID, version, and variables.
+
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -185,6 +191,7 @@ def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         temperature: float | Omit = omit,
         text: response_create_params.Text | Omit = omit,
@@ -214,6 +221,8 @@ def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
+          prompt: (Optional) Prompt object with ID, version, and variables.
+
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -237,6 +246,7 @@ def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         stream: Literal[False] | Literal[True] | Omit = omit,
         temperature: float | Omit = omit,
@@ -260,6 +270,7 @@ def create(
                     "instructions": instructions,
                     "max_infer_iters": max_infer_iters,
                     "previous_response_id": previous_response_id,
+                    "prompt": prompt,
                     "store": store,
                     "stream": stream,
                     "temperature": temperature,
@@ -435,6 +446,7 @@ async def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         stream: Literal[False] | Omit = omit,
         temperature: float | Omit = omit,
@@ -465,6 +477,8 @@ async def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
+          prompt: (Optional) Prompt object with ID, version, and variables.
+
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -489,6 +503,7 @@ async def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         temperature: float | Omit = omit,
         text: response_create_params.Text | Omit = omit,
@@ -518,6 +533,8 @@ async def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
+          prompt: (Optional) Prompt object with ID, version, and variables.
+
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -542,6 +559,7 @@ async def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         temperature: float | Omit = omit,
         text: response_create_params.Text | Omit = omit,
@@ -571,6 +589,8 @@ async def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
+          prompt: (Optional) Prompt object with ID, version, and variables.
+
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -594,6 +614,7 @@ async def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
+        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         stream: Literal[False] | Literal[True] | Omit = omit,
         temperature: float | Omit = omit,
@@ -617,6 +638,7 @@ async def create(
                     "instructions": instructions,
                     "max_infer_iters": max_infer_iters,
                     "previous_response_id": previous_response_id,
+                    "prompt": prompt,
                     "store": store,
                     "stream": stream,
                     "temperature": temperature,
diff --git a/src/llama_stack_client/resources/routes.py b/src/llama_stack_client/resources/routes.py
index 0797d00f..ff9b2c59 100644
--- a/src/llama_stack_client/resources/routes.py
+++ b/src/llama_stack_client/resources/routes.py
@@ -9,10 +9,13 @@
 from __future__ import annotations
 
 from typing import Type, cast
+from typing_extensions import Literal
 
 import httpx
 
-from .._types import Body, Query, Headers, NotGiven, not_given
+from ..types import route_list_params
+from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -51,6 +54,7 @@ def with_streaming_response(self) -> RoutesResourceWithStreamingResponse:
     def list(
         self,
         *,
+        api_filter: Literal["v1", "v1alpha", "v1beta", "deprecated"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -62,6 +66,20 @@ def list(
 
         List all available API routes with their methods and implementing
         providers.
+
+        Args:
+          api_filter: Optional filter to control which routes are returned. Can be an API level ('v1',
+              'v1alpha', 'v1beta') to show non-deprecated routes at that level, or
+              'deprecated' to show deprecated routes across all levels. If not specified,
+              returns only non-deprecated v1 routes.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._get(
             "/v1/inspect/routes",
@@ -70,6 +88,7 @@ def list(
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
+                query=maybe_transform({"api_filter": api_filter}, route_list_params.RouteListParams),
                 post_parser=DataWrapper[RouteListResponse]._unwrapper,
             ),
             cast_to=cast(Type[RouteListResponse], DataWrapper[RouteListResponse]),
@@ -99,6 +118,7 @@ def with_streaming_response(self) -> AsyncRoutesResourceWithStreamingResponse:
     async def list(
         self,
         *,
+        api_filter: Literal["v1", "v1alpha", "v1beta", "deprecated"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -110,6 +130,20 @@ async def list(
 
         List all available API routes with their methods and implementing
         providers.
+
+        Args:
+          api_filter: Optional filter to control which routes are returned. Can be an API level ('v1',
+              'v1alpha', 'v1beta') to show non-deprecated routes at that level, or
+              'deprecated' to show deprecated routes across all levels. If not specified,
+              returns only non-deprecated v1 routes.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._get(
             "/v1/inspect/routes",
@@ -118,6 +152,7 @@ async def list(
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
+                query=await async_maybe_transform({"api_filter": api_filter}, route_list_params.RouteListParams),
                 post_parser=DataWrapper[RouteListResponse]._unwrapper,
             ),
             cast_to=cast(Type[RouteListResponse], DataWrapper[RouteListResponse]),
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index 173a1e03..43df9408 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -15,14 +15,11 @@
     Document as Document,
     ToolCall as ToolCall,
     ParamType as ParamType,
-    AgentConfig as AgentConfig,
     QueryConfig as QueryConfig,
     QueryResult as QueryResult,
     UserMessage as UserMessage,
     ScoringResult as ScoringResult,
     SystemMessage as SystemMessage,
-    ResponseFormat as ResponseFormat,
-    SamplingParams as SamplingParams,
     SafetyViolation as SafetyViolation,
     CompletionMessage as CompletionMessage,
     InterleavedContent as InterleavedContent,
@@ -38,11 +35,11 @@
 from .vector_store import VectorStore as VectorStore
 from .version_info import VersionInfo as VersionInfo
 from .provider_info import ProviderInfo as ProviderInfo
-from .tool_def_param import ToolDefParam as ToolDefParam
 from .create_response import CreateResponse as CreateResponse
 from .response_object import ResponseObject as ResponseObject
 from .file_list_params import FileListParams as FileListParams
 from .tool_list_params import ToolListParams as ToolListParams
+from .route_list_params import RouteListParams as RouteListParams
 from .scoring_fn_params import ScoringFnParams as ScoringFnParams
 from .file_create_params import FileCreateParams as FileCreateParams
 from .tool_list_response import ToolListResponse as ToolListResponse
diff --git a/src/llama_stack_client/types/alpha/__init__.py b/src/llama_stack_client/types/alpha/__init__.py
index 61e02a4e..d14ed874 100644
--- a/src/llama_stack_client/types/alpha/__init__.py
+++ b/src/llama_stack_client/types/alpha/__init__.py
@@ -7,37 +7,3 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
-
-from .job import Job as Job
-from .benchmark import Benchmark as Benchmark
-from .tool_response import ToolResponse as ToolResponse
-from .inference_step import InferenceStep as InferenceStep
-from .shield_call_step import ShieldCallStep as ShieldCallStep
-from .agent_list_params import AgentListParams as AgentListParams
-from .evaluate_response import EvaluateResponse as EvaluateResponse
-from .post_training_job import PostTrainingJob as PostTrainingJob
-from .agent_create_params import AgentCreateParams as AgentCreateParams
-from .agent_list_response import AgentListResponse as AgentListResponse
-from .tool_execution_step import ToolExecutionStep as ToolExecutionStep
-from .tool_response_param import ToolResponseParam as ToolResponseParam
-from .eval_run_eval_params import EvalRunEvalParams as EvalRunEvalParams
-from .agent_create_response import AgentCreateResponse as AgentCreateResponse
-from .memory_retrieval_step import MemoryRetrievalStep as MemoryRetrievalStep
-from .algorithm_config_param import AlgorithmConfigParam as AlgorithmConfigParam
-from .benchmark_config_param import BenchmarkConfigParam as BenchmarkConfigParam
-from .agent_retrieve_response import AgentRetrieveResponse as AgentRetrieveResponse
-from .benchmark_list_response import BenchmarkListResponse as BenchmarkListResponse
-from .inference_rerank_params import InferenceRerankParams as InferenceRerankParams
-from .list_benchmarks_response import ListBenchmarksResponse as ListBenchmarksResponse
-from .benchmark_register_params import BenchmarkRegisterParams as BenchmarkRegisterParams
-from .eval_evaluate_rows_params import EvalEvaluateRowsParams as EvalEvaluateRowsParams
-from .inference_rerank_response import InferenceRerankResponse as InferenceRerankResponse
-from .eval_run_eval_alpha_params import EvalRunEvalAlphaParams as EvalRunEvalAlphaParams
-from .eval_evaluate_rows_alpha_params import EvalEvaluateRowsAlphaParams as EvalEvaluateRowsAlphaParams
-from .list_post_training_jobs_response import ListPostTrainingJobsResponse as ListPostTrainingJobsResponse
-from .post_training_preference_optimize_params import (
-    PostTrainingPreferenceOptimizeParams as PostTrainingPreferenceOptimizeParams,
-)
-from .post_training_supervised_fine_tune_params import (
-    PostTrainingSupervisedFineTuneParams as PostTrainingSupervisedFineTuneParams,
-)
diff --git a/src/llama_stack_client/types/alpha/agent_create_params.py b/src/llama_stack_client/types/alpha/agent_create_params.py
deleted file mode 100644
index 9c420379..00000000
--- a/src/llama_stack_client/types/alpha/agent_create_params.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-from ..shared_params.agent_config import AgentConfig
-
-__all__ = ["AgentCreateParams"]
-
-
-class AgentCreateParams(TypedDict, total=False):
-    agent_config: Required[AgentConfig]
-    """The configuration for the agent."""
diff --git a/src/llama_stack_client/types/alpha/agent_create_response.py b/src/llama_stack_client/types/alpha/agent_create_response.py
deleted file mode 100644
index 70e7d98b..00000000
--- a/src/llama_stack_client/types/alpha/agent_create_response.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ..._models import BaseModel
-
-__all__ = ["AgentCreateResponse"]
-
-
-class AgentCreateResponse(BaseModel):
-    agent_id: str
-    """Unique identifier for the created agent"""
diff --git a/src/llama_stack_client/types/alpha/agent_list_params.py b/src/llama_stack_client/types/alpha/agent_list_params.py
deleted file mode 100644
index 0b50ef24..00000000
--- a/src/llama_stack_client/types/alpha/agent_list_params.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import TypedDict
-
-__all__ = ["AgentListParams"]
-
-
-class AgentListParams(TypedDict, total=False):
-    limit: int
-    """The number of agents to return."""
-
-    start_index: int
-    """The index to start the pagination from."""
diff --git a/src/llama_stack_client/types/alpha/agent_list_response.py b/src/llama_stack_client/types/alpha/agent_list_response.py
deleted file mode 100644
index 212a4a9f..00000000
--- a/src/llama_stack_client/types/alpha/agent_list_response.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-
-from ..._models import BaseModel
-
-__all__ = ["AgentListResponse"]
-
-
-class AgentListResponse(BaseModel):
-    data: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """The list of items for the current page"""
-
-    has_more: bool
-    """Whether there are more items available after this set"""
-
-    url: Optional[str] = None
-    """The URL for accessing this list"""
diff --git a/src/llama_stack_client/types/alpha/agent_retrieve_response.py b/src/llama_stack_client/types/alpha/agent_retrieve_response.py
deleted file mode 100644
index bcf40e21..00000000
--- a/src/llama_stack_client/types/alpha/agent_retrieve_response.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from datetime import datetime
-
-from ..._models import BaseModel
-from ..shared.agent_config import AgentConfig
-
-__all__ = ["AgentRetrieveResponse"]
-
-
-class AgentRetrieveResponse(BaseModel):
-    agent_config: AgentConfig
-    """Configuration settings for the agent"""
-
-    agent_id: str
-    """Unique identifier for the agent"""
-
-    created_at: datetime
-    """Timestamp when the agent was created"""
diff --git a/src/llama_stack_client/types/alpha/agents/__init__.py b/src/llama_stack_client/types/alpha/agents/__init__.py
index f28e38e4..d14ed874 100644
--- a/src/llama_stack_client/types/alpha/agents/__init__.py
+++ b/src/llama_stack_client/types/alpha/agents/__init__.py
@@ -7,16 +7,3 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
-
-from .turn import Turn as Turn
-from .session import Session as Session
-from .turn_create_params import TurnCreateParams as TurnCreateParams
-from .turn_resume_params import TurnResumeParams as TurnResumeParams
-from .session_list_params import SessionListParams as SessionListParams
-from .turn_response_event import TurnResponseEvent as TurnResponseEvent
-from .session_create_params import SessionCreateParams as SessionCreateParams
-from .session_list_response import SessionListResponse as SessionListResponse
-from .step_retrieve_response import StepRetrieveResponse as StepRetrieveResponse
-from .session_create_response import SessionCreateResponse as SessionCreateResponse
-from .session_retrieve_params import SessionRetrieveParams as SessionRetrieveParams
-from .agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk as AgentTurnResponseStreamChunk
diff --git a/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py b/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
deleted file mode 100644
index 5a518938..00000000
--- a/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ...._models import BaseModel
-from .turn_response_event import TurnResponseEvent
-
-__all__ = ["AgentTurnResponseStreamChunk"]
-
-
-class AgentTurnResponseStreamChunk(BaseModel):
-    event: TurnResponseEvent
-    """Individual event in the agent turn response stream"""
diff --git a/src/llama_stack_client/types/alpha/agents/session.py b/src/llama_stack_client/types/alpha/agents/session.py
deleted file mode 100644
index c2b3571d..00000000
--- a/src/llama_stack_client/types/alpha/agents/session.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from datetime import datetime
-
-from .turn import Turn
-from ...._models import BaseModel
-
-__all__ = ["Session"]
-
-
-class Session(BaseModel):
-    session_id: str
-    """Unique identifier for the conversation session"""
-
-    session_name: str
-    """Human-readable name for the session"""
-
-    started_at: datetime
-    """Timestamp when the session was created"""
-
-    turns: List[Turn]
-    """List of all turns that have occurred in this session"""
diff --git a/src/llama_stack_client/types/alpha/agents/session_create_params.py b/src/llama_stack_client/types/alpha/agents/session_create_params.py
deleted file mode 100644
index e8fb03fa..00000000
--- a/src/llama_stack_client/types/alpha/agents/session_create_params.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["SessionCreateParams"]
-
-
-class SessionCreateParams(TypedDict, total=False):
-    session_name: Required[str]
-    """The name of the session to create."""
diff --git a/src/llama_stack_client/types/alpha/agents/session_create_response.py b/src/llama_stack_client/types/alpha/agents/session_create_response.py
deleted file mode 100644
index dd8b1eba..00000000
--- a/src/llama_stack_client/types/alpha/agents/session_create_response.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ...._models import BaseModel
-
-__all__ = ["SessionCreateResponse"]
-
-
-class SessionCreateResponse(BaseModel):
-    session_id: str
-    """Unique identifier for the created session"""
diff --git a/src/llama_stack_client/types/alpha/agents/session_list_params.py b/src/llama_stack_client/types/alpha/agents/session_list_params.py
deleted file mode 100644
index 0ff7609b..00000000
--- a/src/llama_stack_client/types/alpha/agents/session_list_params.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import TypedDict
-
-__all__ = ["SessionListParams"]
-
-
-class SessionListParams(TypedDict, total=False):
-    limit: int
-    """The number of sessions to return."""
-
-    start_index: int
-    """The index to start the pagination from."""
diff --git a/src/llama_stack_client/types/alpha/agents/session_list_response.py b/src/llama_stack_client/types/alpha/agents/session_list_response.py
deleted file mode 100644
index ad686bd3..00000000
--- a/src/llama_stack_client/types/alpha/agents/session_list_response.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-
-from ...._models import BaseModel
-
-__all__ = ["SessionListResponse"]
-
-
-class SessionListResponse(BaseModel):
-    data: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """The list of items for the current page"""
-
-    has_more: bool
-    """Whether there are more items available after this set"""
-
-    url: Optional[str] = None
-    """The URL for accessing this list"""
diff --git a/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py b/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
deleted file mode 100644
index 27bc0761..00000000
--- a/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-from ...._types import SequenceNotStr
-
-__all__ = ["SessionRetrieveParams"]
-
-
-class SessionRetrieveParams(TypedDict, total=False):
-    agent_id: Required[str]
-
-    turn_ids: SequenceNotStr[str]
-    """(Optional) List of turn IDs to filter the session by."""
diff --git a/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py b/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
deleted file mode 100644
index 300c6ffb..00000000
--- a/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Annotated, TypeAlias
-
-from ...._utils import PropertyInfo
-from ...._models import BaseModel
-from ..inference_step import InferenceStep
-from ..shield_call_step import ShieldCallStep
-from ..tool_execution_step import ToolExecutionStep
-from ..memory_retrieval_step import MemoryRetrievalStep
-
-__all__ = ["StepRetrieveResponse", "Step"]
-
-Step: TypeAlias = Annotated[
-    Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep],
-    PropertyInfo(discriminator="step_type"),
-]
-
-
-class StepRetrieveResponse(BaseModel):
-    step: Step
-    """The complete step data and execution details"""
diff --git a/src/llama_stack_client/types/alpha/agents/turn.py b/src/llama_stack_client/types/alpha/agents/turn.py
deleted file mode 100644
index 51ec9ddf..00000000
--- a/src/llama_stack_client/types/alpha/agents/turn.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-from datetime import datetime
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ...._utils import PropertyInfo
-from ...._models import BaseModel
-from ..inference_step import InferenceStep
-from ..shield_call_step import ShieldCallStep
-from ..tool_execution_step import ToolExecutionStep
-from ...shared.user_message import UserMessage
-from ..memory_retrieval_step import MemoryRetrievalStep
-from ...shared.completion_message import CompletionMessage
-from ...shared.tool_response_message import ToolResponseMessage
-from ...shared.interleaved_content_item import InterleavedContentItem
-
-__all__ = [
-    "Turn",
-    "InputMessage",
-    "Step",
-    "OutputAttachment",
-    "OutputAttachmentContent",
-    "OutputAttachmentContentImageContentItem",
-    "OutputAttachmentContentImageContentItemImage",
-    "OutputAttachmentContentImageContentItemImageURL",
-    "OutputAttachmentContentTextContentItem",
-    "OutputAttachmentContentURL",
-]
-
-InputMessage: TypeAlias = Union[UserMessage, ToolResponseMessage]
-
-Step: TypeAlias = Annotated[
-    Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep],
-    PropertyInfo(discriminator="step_type"),
-]
-
-
-class OutputAttachmentContentImageContentItemImageURL(BaseModel):
-    uri: str
-    """The URL string pointing to the resource"""
-
-
-class OutputAttachmentContentImageContentItemImage(BaseModel):
-    data: Optional[str] = None
-    """base64 encoded image data as string"""
-
-    url: Optional[OutputAttachmentContentImageContentItemImageURL] = None
-    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-
-    Note that URL could have length limits.
-    """
-
-
-class OutputAttachmentContentImageContentItem(BaseModel):
-    image: OutputAttachmentContentImageContentItemImage
-    """Image as a base64 encoded string or an URL"""
-
-    type: Literal["image"]
-    """Discriminator type of the content item. Always "image" """
-
-
-class OutputAttachmentContentTextContentItem(BaseModel):
-    text: str
-    """Text content"""
-
-    type: Literal["text"]
-    """Discriminator type of the content item. Always "text" """
-
-
-class OutputAttachmentContentURL(BaseModel):
-    uri: str
-    """The URL string pointing to the resource"""
-
-
-OutputAttachmentContent: TypeAlias = Union[
-    str,
-    OutputAttachmentContentImageContentItem,
-    OutputAttachmentContentTextContentItem,
-    List[InterleavedContentItem],
-    OutputAttachmentContentURL,
-]
-
-
-class OutputAttachment(BaseModel):
-    content: OutputAttachmentContent
-    """The content of the attachment."""
-
-    mime_type: str
-    """The MIME type of the attachment."""
-
-
-class Turn(BaseModel):
-    input_messages: List[InputMessage]
-    """List of messages that initiated this turn"""
-
-    output_message: CompletionMessage
-    """The model's generated response containing content and metadata"""
-
-    session_id: str
-    """Unique identifier for the conversation session"""
-
-    started_at: datetime
-    """Timestamp when the turn began"""
-
-    steps: List[Step]
-    """Ordered list of processing steps executed during this turn"""
-
-    turn_id: str
-    """Unique identifier for the turn within a session"""
-
-    completed_at: Optional[datetime] = None
-    """(Optional) Timestamp when the turn finished, if completed"""
-
-    output_attachments: Optional[List[OutputAttachment]] = None
-    """(Optional) Files or media attached to the agent's response"""
diff --git a/src/llama_stack_client/types/alpha/agents/turn_create_params.py b/src/llama_stack_client/types/alpha/agents/turn_create_params.py
deleted file mode 100644
index 79ee42be..00000000
--- a/src/llama_stack_client/types/alpha/agents/turn_create_params.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from ...._types import SequenceNotStr
-from ...shared_params.user_message import UserMessage
-from ...shared_params.tool_response_message import ToolResponseMessage
-from ...shared_params.interleaved_content_item import InterleavedContentItem
-
-__all__ = [
-    "TurnCreateParamsBase",
-    "Message",
-    "Document",
-    "DocumentContent",
-    "DocumentContentImageContentItem",
-    "DocumentContentImageContentItemImage",
-    "DocumentContentImageContentItemImageURL",
-    "DocumentContentTextContentItem",
-    "DocumentContentURL",
-    "ToolConfig",
-    "Toolgroup",
-    "ToolgroupAgentToolGroupWithArgs",
-    "TurnCreateParamsNonStreaming",
-    "TurnCreateParamsStreaming",
-]
-
-
-class TurnCreateParamsBase(TypedDict, total=False):
-    agent_id: Required[str]
-
-    messages: Required[Iterable[Message]]
-    """List of messages to start the turn with."""
-
-    documents: Iterable[Document]
-    """(Optional) List of documents to create the turn with."""
-
-    tool_config: ToolConfig
-    """
-    (Optional) The tool configuration to create the turn with, will be used to
-    override the agent's tool_config.
-    """
-
-    toolgroups: SequenceNotStr[Toolgroup]
-    """
-    (Optional) List of toolgroups to create the turn with, will be used in addition
-    to the agent's config toolgroups for the request.
-    """
-
-
-Message: TypeAlias = Union[UserMessage, ToolResponseMessage]
-
-
-class DocumentContentImageContentItemImageURL(TypedDict, total=False):
-    uri: Required[str]
-    """The URL string pointing to the resource"""
-
-
-class DocumentContentImageContentItemImage(TypedDict, total=False):
-    data: str
-    """base64 encoded image data as string"""
-
-    url: DocumentContentImageContentItemImageURL
-    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-
-    Note that URL could have length limits.
-    """
-
-
-class DocumentContentImageContentItem(TypedDict, total=False):
-    image: Required[DocumentContentImageContentItemImage]
-    """Image as a base64 encoded string or an URL"""
-
-    type: Required[Literal["image"]]
-    """Discriminator type of the content item. Always "image" """
-
-
-class DocumentContentTextContentItem(TypedDict, total=False):
-    text: Required[str]
-    """Text content"""
-
-    type: Required[Literal["text"]]
-    """Discriminator type of the content item. Always "text" """
-
-
-class DocumentContentURL(TypedDict, total=False):
-    uri: Required[str]
-    """The URL string pointing to the resource"""
-
-
-DocumentContent: TypeAlias = Union[
-    str,
-    DocumentContentImageContentItem,
-    DocumentContentTextContentItem,
-    Iterable[InterleavedContentItem],
-    DocumentContentURL,
-]
-
-
-class Document(TypedDict, total=False):
-    content: Required[DocumentContent]
-    """The content of the document."""
-
-    mime_type: Required[str]
-    """The MIME type of the document."""
-
-
-class ToolConfig(TypedDict, total=False):
-    system_message_behavior: Literal["append", "replace"]
-    """(Optional) Config for how to override the default system prompt.
-
-    - `SystemMessageBehavior.append`: Appends the provided system message to the
-      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
-      system prompt with the provided system message. The system message can include
-      the string '{{function_definitions}}' to indicate where the function
-      definitions should be inserted.
-    """
-
-    tool_choice: Union[Literal["auto", "required", "none"], str]
-    """(Optional) Whether tool use is automatic, required, or none.
-
-    Can also specify a tool name to use a specific tool. Defaults to
-    ToolChoice.auto.
-    """
-
-    tool_prompt_format: Literal["json", "function_tag", "python_list"]
-    """(Optional) Instructs the model how to format tool calls.
-
-    By default, Llama Stack will attempt to use a format that is best adapted to the
-    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
-    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-    are output as Python syntax -- a list of function calls.
-    """
-
-
-class ToolgroupAgentToolGroupWithArgs(TypedDict, total=False):
-    args: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-
-    name: Required[str]
-
-
-Toolgroup: TypeAlias = Union[str, ToolgroupAgentToolGroupWithArgs]
-
-
-class TurnCreateParamsNonStreaming(TurnCreateParamsBase, total=False):
-    stream: Literal[False]
-    """(Optional) If True, generate an SSE event stream of the response.
-
-    Defaults to False.
-    """
-
-
-class TurnCreateParamsStreaming(TurnCreateParamsBase):
-    stream: Required[Literal[True]]
-    """(Optional) If True, generate an SSE event stream of the response.
-
-    Defaults to False.
-    """
-
-
-TurnCreateParams = Union[TurnCreateParamsNonStreaming, TurnCreateParamsStreaming]
diff --git a/src/llama_stack_client/types/alpha/agents/turn_response_event.py b/src/llama_stack_client/types/alpha/agents/turn_response_event.py
deleted file mode 100644
index 3088e623..00000000
--- a/src/llama_stack_client/types/alpha/agents/turn_response_event.py
+++ /dev/null
@@ -1,166 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from .turn import Turn
-from ...._utils import PropertyInfo
-from ...._models import BaseModel
-from ..inference_step import InferenceStep
-from ..shield_call_step import ShieldCallStep
-from ...shared.tool_call import ToolCall
-from ..tool_execution_step import ToolExecutionStep
-from ..memory_retrieval_step import MemoryRetrievalStep
-
-__all__ = [
-    "TurnResponseEvent",
-    "Payload",
-    "PayloadAgentTurnResponseStepStartPayload",
-    "PayloadAgentTurnResponseStepProgressPayload",
-    "PayloadAgentTurnResponseStepProgressPayloadDelta",
-    "PayloadAgentTurnResponseStepProgressPayloadDeltaTextDelta",
-    "PayloadAgentTurnResponseStepProgressPayloadDeltaImageDelta",
-    "PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDelta",
-    "PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDeltaToolCall",
-    "PayloadAgentTurnResponseStepCompletePayload",
-    "PayloadAgentTurnResponseStepCompletePayloadStepDetails",
-    "PayloadAgentTurnResponseTurnStartPayload",
-    "PayloadAgentTurnResponseTurnCompletePayload",
-    "PayloadAgentTurnResponseTurnAwaitingInputPayload",
-]
-
-
-class PayloadAgentTurnResponseStepStartPayload(BaseModel):
-    event_type: Literal["step_start"]
-    """Type of event being reported"""
-
-    step_id: str
-    """Unique identifier for the step within a turn"""
-
-    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
-    """Type of step being executed"""
-
-    metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-    """(Optional) Additional metadata for the step"""
-
-
-class PayloadAgentTurnResponseStepProgressPayloadDeltaTextDelta(BaseModel):
-    text: str
-    """The incremental text content"""
-
-    type: Literal["text"]
-    """Discriminator type of the delta. Always "text" """
-
-
-class PayloadAgentTurnResponseStepProgressPayloadDeltaImageDelta(BaseModel):
-    image: str
-    """The incremental image data as bytes"""
-
-    type: Literal["image"]
-    """Discriminator type of the delta. Always "image" """
-
-
-PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDeltaToolCall: TypeAlias = Union[str, ToolCall]
-
-
-class PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDelta(BaseModel):
-    parse_status: Literal["started", "in_progress", "failed", "succeeded"]
-    """Current parsing status of the tool call"""
-
-    tool_call: PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDeltaToolCall
-    """Either an in-progress tool call string or the final parsed tool call"""
-
-    type: Literal["tool_call"]
-    """Discriminator type of the delta. Always "tool_call" """
-
-
-PayloadAgentTurnResponseStepProgressPayloadDelta: TypeAlias = Annotated[
-    Union[
-        PayloadAgentTurnResponseStepProgressPayloadDeltaTextDelta,
-        PayloadAgentTurnResponseStepProgressPayloadDeltaImageDelta,
-        PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDelta,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class PayloadAgentTurnResponseStepProgressPayload(BaseModel):
-    delta: PayloadAgentTurnResponseStepProgressPayloadDelta
-    """Incremental content changes during step execution"""
-
-    event_type: Literal["step_progress"]
-    """Type of event being reported"""
-
-    step_id: str
-    """Unique identifier for the step within a turn"""
-
-    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
-    """Type of step being executed"""
-
-
-PayloadAgentTurnResponseStepCompletePayloadStepDetails: TypeAlias = Annotated[
-    Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep],
-    PropertyInfo(discriminator="step_type"),
-]
-
-
-class PayloadAgentTurnResponseStepCompletePayload(BaseModel):
-    event_type: Literal["step_complete"]
-    """Type of event being reported"""
-
-    step_details: PayloadAgentTurnResponseStepCompletePayloadStepDetails
-    """Complete details of the executed step"""
-
-    step_id: str
-    """Unique identifier for the step within a turn"""
-
-    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
-    """Type of step being executed"""
-
-
-class PayloadAgentTurnResponseTurnStartPayload(BaseModel):
-    event_type: Literal["turn_start"]
-    """Type of event being reported"""
-
-    turn_id: str
-    """Unique identifier for the turn within a session"""
-
-
-class PayloadAgentTurnResponseTurnCompletePayload(BaseModel):
-    event_type: Literal["turn_complete"]
-    """Type of event being reported"""
-
-    turn: Turn
-    """Complete turn data including all steps and results"""
-
-
-class PayloadAgentTurnResponseTurnAwaitingInputPayload(BaseModel):
-    event_type: Literal["turn_awaiting_input"]
-    """Type of event being reported"""
-
-    turn: Turn
-    """Turn data when waiting for external tool responses"""
-
-
-Payload: TypeAlias = Annotated[
-    Union[
-        PayloadAgentTurnResponseStepStartPayload,
-        PayloadAgentTurnResponseStepProgressPayload,
-        PayloadAgentTurnResponseStepCompletePayload,
-        PayloadAgentTurnResponseTurnStartPayload,
-        PayloadAgentTurnResponseTurnCompletePayload,
-        PayloadAgentTurnResponseTurnAwaitingInputPayload,
-    ],
-    PropertyInfo(discriminator="event_type"),
-]
-
-
-class TurnResponseEvent(BaseModel):
-    payload: Payload
-    """Event-specific payload containing event data"""
diff --git a/src/llama_stack_client/types/alpha/agents/turn_resume_params.py b/src/llama_stack_client/types/alpha/agents/turn_resume_params.py
deleted file mode 100644
index 23fda973..00000000
--- a/src/llama_stack_client/types/alpha/agents/turn_resume_params.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from ..tool_response_param import ToolResponseParam
-
-__all__ = ["TurnResumeParamsBase", "TurnResumeParamsNonStreaming", "TurnResumeParamsStreaming"]
-
-
-class TurnResumeParamsBase(TypedDict, total=False):
-    agent_id: Required[str]
-
-    session_id: Required[str]
-
-    tool_responses: Required[Iterable[ToolResponseParam]]
-    """The tool call responses to resume the turn with."""
-
-
-class TurnResumeParamsNonStreaming(TurnResumeParamsBase, total=False):
-    stream: Literal[False]
-    """Whether to stream the response."""
-
-
-class TurnResumeParamsStreaming(TurnResumeParamsBase):
-    stream: Required[Literal[True]]
-    """Whether to stream the response."""
-
-
-TurnResumeParams = Union[TurnResumeParamsNonStreaming, TurnResumeParamsStreaming]
diff --git a/src/llama_stack_client/types/alpha/algorithm_config_param.py b/src/llama_stack_client/types/alpha/algorithm_config_param.py
deleted file mode 100644
index bf3b7d0b..00000000
--- a/src/llama_stack_client/types/alpha/algorithm_config_param.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from ..._types import SequenceNotStr
-
-__all__ = ["AlgorithmConfigParam", "LoraFinetuningConfig", "QatFinetuningConfig"]
-
-
-class LoraFinetuningConfig(TypedDict, total=False):
-    alpha: Required[int]
-    """LoRA scaling parameter that controls adaptation strength"""
-
-    apply_lora_to_mlp: Required[bool]
-    """Whether to apply LoRA to MLP layers"""
-
-    apply_lora_to_output: Required[bool]
-    """Whether to apply LoRA to output projection layers"""
-
-    lora_attn_modules: Required[SequenceNotStr[str]]
-    """List of attention module names to apply LoRA to"""
-
-    rank: Required[int]
-    """Rank of the LoRA adaptation (lower rank = fewer parameters)"""
-
-    type: Required[Literal["LoRA"]]
-    """Algorithm type identifier, always "LoRA" """
-
-    quantize_base: bool
-    """(Optional) Whether to quantize the base model weights"""
-
-    use_dora: bool
-    """(Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)"""
-
-
-class QatFinetuningConfig(TypedDict, total=False):
-    group_size: Required[int]
-    """Size of groups for grouped quantization"""
-
-    quantizer_name: Required[str]
-    """Name of the quantization algorithm to use"""
-
-    type: Required[Literal["QAT"]]
-    """Algorithm type identifier, always "QAT" """
-
-
-AlgorithmConfigParam: TypeAlias = Union[LoraFinetuningConfig, QatFinetuningConfig]
diff --git a/src/llama_stack_client/types/alpha/benchmark.py b/src/llama_stack_client/types/alpha/benchmark.py
deleted file mode 100644
index b70c8f28..00000000
--- a/src/llama_stack_client/types/alpha/benchmark.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["Benchmark"]
-
-
-class Benchmark(BaseModel):
-    dataset_id: str
-    """Identifier of the dataset to use for the benchmark evaluation"""
-
-    identifier: str
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-    """Metadata for this evaluation task"""
-
-    provider_id: str
-
-    scoring_functions: List[str]
-    """List of scoring function identifiers to apply during evaluation"""
-
-    type: Literal["benchmark"]
-    """The resource type, always benchmark"""
-
-    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/alpha/benchmark_config_param.py b/src/llama_stack_client/types/alpha/benchmark_config_param.py
deleted file mode 100644
index e32cd187..00000000
--- a/src/llama_stack_client/types/alpha/benchmark_config_param.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from ..scoring_fn_params_param import ScoringFnParamsParam
-from ..shared_params.agent_config import AgentConfig
-from ..shared_params.system_message import SystemMessage
-from ..shared_params.sampling_params import SamplingParams
-
-__all__ = ["BenchmarkConfigParam", "EvalCandidate", "EvalCandidateModelCandidate", "EvalCandidateAgentCandidate"]
-
-
-class EvalCandidateModelCandidate(TypedDict, total=False):
-    model: Required[str]
-    """The model ID to evaluate."""
-
-    sampling_params: Required[SamplingParams]
-    """The sampling parameters for the model."""
-
-    type: Required[Literal["model"]]
-
-    system_message: SystemMessage
-    """(Optional) The system message providing instructions or context to the model."""
-
-
-class EvalCandidateAgentCandidate(TypedDict, total=False):
-    config: Required[AgentConfig]
-    """The configuration for the agent candidate."""
-
-    type: Required[Literal["agent"]]
-
-
-EvalCandidate: TypeAlias = Union[EvalCandidateModelCandidate, EvalCandidateAgentCandidate]
-
-
-class BenchmarkConfigParam(TypedDict, total=False):
-    eval_candidate: Required[EvalCandidate]
-    """The candidate to evaluate."""
-
-    scoring_params: Required[Dict[str, ScoringFnParamsParam]]
-    """
-    Map between scoring function id and parameters for each scoring function you
-    want to run
-    """
-
-    num_examples: int
-    """(Optional) The number of examples to evaluate.
-
-    If not provided, all examples in the dataset will be evaluated
-    """
diff --git a/src/llama_stack_client/types/alpha/benchmark_list_response.py b/src/llama_stack_client/types/alpha/benchmark_list_response.py
deleted file mode 100644
index 56d7d8ba..00000000
--- a/src/llama_stack_client/types/alpha/benchmark_list_response.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import TypeAlias
-
-from .benchmark import Benchmark
-
-__all__ = ["BenchmarkListResponse"]
-
-BenchmarkListResponse: TypeAlias = List[Benchmark]
diff --git a/src/llama_stack_client/types/alpha/benchmark_register_params.py b/src/llama_stack_client/types/alpha/benchmark_register_params.py
deleted file mode 100644
index 84be3786..00000000
--- a/src/llama_stack_client/types/alpha/benchmark_register_params.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-from ..._types import SequenceNotStr
-
-__all__ = ["BenchmarkRegisterParams"]
-
-
-class BenchmarkRegisterParams(TypedDict, total=False):
-    benchmark_id: Required[str]
-    """The ID of the benchmark to register."""
-
-    dataset_id: Required[str]
-    """The ID of the dataset to use for the benchmark."""
-
-    scoring_functions: Required[SequenceNotStr[str]]
-    """The scoring functions to use for the benchmark."""
-
-    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """The metadata to use for the benchmark."""
-
-    provider_benchmark_id: str
-    """The ID of the provider benchmark to use for the benchmark."""
-
-    provider_id: str
-    """The ID of the provider to use for the benchmark."""
diff --git a/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py b/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
deleted file mode 100644
index 36036ff9..00000000
--- a/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-from ..._types import SequenceNotStr
-from .benchmark_config_param import BenchmarkConfigParam
-
-__all__ = ["EvalEvaluateRowsAlphaParams"]
-
-
-class EvalEvaluateRowsAlphaParams(TypedDict, total=False):
-    benchmark_config: Required[BenchmarkConfigParam]
-    """The configuration for the benchmark."""
-
-    input_rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
-    """The rows to evaluate."""
-
-    scoring_functions: Required[SequenceNotStr[str]]
-    """The scoring functions to use for the evaluation."""
diff --git a/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py b/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
deleted file mode 100644
index 3aba96a2..00000000
--- a/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-from ..._types import SequenceNotStr
-from .benchmark_config_param import BenchmarkConfigParam
-
-__all__ = ["EvalEvaluateRowsParams"]
-
-
-class EvalEvaluateRowsParams(TypedDict, total=False):
-    benchmark_config: Required[BenchmarkConfigParam]
-    """The configuration for the benchmark."""
-
-    input_rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
-    """The rows to evaluate."""
-
-    scoring_functions: Required[SequenceNotStr[str]]
-    """The scoring functions to use for the evaluation."""
diff --git a/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
deleted file mode 100644
index 760f9dc6..00000000
--- a/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-from .benchmark_config_param import BenchmarkConfigParam
-
-__all__ = ["EvalRunEvalAlphaParams"]
-
-
-class EvalRunEvalAlphaParams(TypedDict, total=False):
-    benchmark_config: Required[BenchmarkConfigParam]
-    """The configuration for the benchmark."""
diff --git a/src/llama_stack_client/types/alpha/eval_run_eval_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_params.py
deleted file mode 100644
index bb166ba3..00000000
--- a/src/llama_stack_client/types/alpha/eval_run_eval_params.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-from .benchmark_config_param import BenchmarkConfigParam
-
-__all__ = ["EvalRunEvalParams"]
-
-
-class EvalRunEvalParams(TypedDict, total=False):
-    benchmark_config: Required[BenchmarkConfigParam]
-    """The configuration for the benchmark."""
diff --git a/src/llama_stack_client/types/alpha/evaluate_response.py b/src/llama_stack_client/types/alpha/evaluate_response.py
deleted file mode 100644
index 69d310ef..00000000
--- a/src/llama_stack_client/types/alpha/evaluate_response.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union
-
-from ..._models import BaseModel
-from ..shared.scoring_result import ScoringResult
-
-__all__ = ["EvaluateResponse"]
-
-
-class EvaluateResponse(BaseModel):
-    generations: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """The generations from the evaluation."""
-
-    scores: Dict[str, ScoringResult]
-    """The scores from the evaluation."""
diff --git a/src/llama_stack_client/types/alpha/inference_rerank_params.py b/src/llama_stack_client/types/alpha/inference_rerank_params.py
deleted file mode 100644
index 6502c3d4..00000000
--- a/src/llama_stack_client/types/alpha/inference_rerank_params.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from ..._types import SequenceNotStr
-
-__all__ = [
-    "InferenceRerankParams",
-    "Item",
-    "ItemOpenAIChatCompletionContentPartTextParam",
-    "ItemOpenAIChatCompletionContentPartImageParam",
-    "ItemOpenAIChatCompletionContentPartImageParamImageURL",
-    "Query",
-    "QueryOpenAIChatCompletionContentPartTextParam",
-    "QueryOpenAIChatCompletionContentPartImageParam",
-    "QueryOpenAIChatCompletionContentPartImageParamImageURL",
-]
-
-
-class InferenceRerankParams(TypedDict, total=False):
-    items: Required[SequenceNotStr[Item]]
-    """List of items to rerank.
-
-    Each item can be a string, text content part, or image content part. Each input
-    must not exceed the model's max input token length.
-    """
-
-    model: Required[str]
-    """The identifier of the reranking model to use."""
-
-    query: Required[Query]
-    """The search query to rank items against.
-
-    Can be a string, text content part, or image content part. The input must not
-    exceed the model's max input token length.
-    """
-
-    max_num_results: int
-    """(Optional) Maximum number of results to return. Default: returns all."""
-
-
-class ItemOpenAIChatCompletionContentPartTextParam(TypedDict, total=False):
-    text: Required[str]
-    """The text content of the message"""
-
-    type: Required[Literal["text"]]
-    """Must be "text" to identify this as text content"""
-
-
-class ItemOpenAIChatCompletionContentPartImageParamImageURL(TypedDict, total=False):
-    url: Required[str]
-    """URL of the image to include in the message"""
-
-    detail: str
-    """(Optional) Level of detail for image processing.
-
-    Can be "low", "high", or "auto"
-    """
-
-
-class ItemOpenAIChatCompletionContentPartImageParam(TypedDict, total=False):
-    image_url: Required[ItemOpenAIChatCompletionContentPartImageParamImageURL]
-    """Image URL specification and processing details"""
-
-    type: Required[Literal["image_url"]]
-    """Must be "image_url" to identify this as image content"""
-
-
-Item: TypeAlias = Union[
-    str, ItemOpenAIChatCompletionContentPartTextParam, ItemOpenAIChatCompletionContentPartImageParam
-]
-
-
-class QueryOpenAIChatCompletionContentPartTextParam(TypedDict, total=False):
-    text: Required[str]
-    """The text content of the message"""
-
-    type: Required[Literal["text"]]
-    """Must be "text" to identify this as text content"""
-
-
-class QueryOpenAIChatCompletionContentPartImageParamImageURL(TypedDict, total=False):
-    url: Required[str]
-    """URL of the image to include in the message"""
-
-    detail: str
-    """(Optional) Level of detail for image processing.
-
-    Can be "low", "high", or "auto"
-    """
-
-
-class QueryOpenAIChatCompletionContentPartImageParam(TypedDict, total=False):
-    image_url: Required[QueryOpenAIChatCompletionContentPartImageParamImageURL]
-    """Image URL specification and processing details"""
-
-    type: Required[Literal["image_url"]]
-    """Must be "image_url" to identify this as image content"""
-
-
-Query: TypeAlias = Union[
-    str, QueryOpenAIChatCompletionContentPartTextParam, QueryOpenAIChatCompletionContentPartImageParam
-]
diff --git a/src/llama_stack_client/types/alpha/inference_rerank_response.py b/src/llama_stack_client/types/alpha/inference_rerank_response.py
deleted file mode 100644
index f2cd133c..00000000
--- a/src/llama_stack_client/types/alpha/inference_rerank_response.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import TypeAlias
-
-from ..._models import BaseModel
-
-__all__ = ["InferenceRerankResponse", "InferenceRerankResponseItem"]
-
-
-class InferenceRerankResponseItem(BaseModel):
-    index: int
-    """The original index of the document in the input list"""
-
-    relevance_score: float
-    """The relevance score from the model output.
-
-    Values are inverted when applicable so that higher scores indicate greater
-    relevance.
-    """
-
-
-InferenceRerankResponse: TypeAlias = List[InferenceRerankResponseItem]
diff --git a/src/llama_stack_client/types/alpha/inference_step.py b/src/llama_stack_client/types/alpha/inference_step.py
deleted file mode 100644
index a4dfa054..00000000
--- a/src/llama_stack_client/types/alpha/inference_step.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from datetime import datetime
-from typing_extensions import Literal
-
-from pydantic import Field as FieldInfo
-
-from ..._models import BaseModel
-from ..shared.completion_message import CompletionMessage
-
-__all__ = ["InferenceStep"]
-
-
-class InferenceStep(BaseModel):
-    api_model_response: CompletionMessage = FieldInfo(alias="model_response")
-    """The response from the LLM."""
-
-    step_id: str
-    """The ID of the step."""
-
-    step_type: Literal["inference"]
-    """Type of the step in an agent turn."""
-
-    turn_id: str
-    """The ID of the turn."""
-
-    completed_at: Optional[datetime] = None
-    """The time the step completed."""
-
-    started_at: Optional[datetime] = None
-    """The time the step started."""
diff --git a/src/llama_stack_client/types/alpha/job.py b/src/llama_stack_client/types/alpha/job.py
deleted file mode 100644
index 696eba85..00000000
--- a/src/llama_stack_client/types/alpha/job.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["Job"]
-
-
-class Job(BaseModel):
-    job_id: str
-    """Unique identifier for the job"""
-
-    status: Literal["completed", "in_progress", "failed", "scheduled", "cancelled"]
-    """Current execution status of the job"""
diff --git a/src/llama_stack_client/types/alpha/list_benchmarks_response.py b/src/llama_stack_client/types/alpha/list_benchmarks_response.py
deleted file mode 100644
index 8ea3b963..00000000
--- a/src/llama_stack_client/types/alpha/list_benchmarks_response.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ..._models import BaseModel
-from .benchmark_list_response import BenchmarkListResponse
-
-__all__ = ["ListBenchmarksResponse"]
-
-
-class ListBenchmarksResponse(BaseModel):
-    data: BenchmarkListResponse
diff --git a/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py b/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
deleted file mode 100644
index 6c87bcd7..00000000
--- a/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ..._models import BaseModel
-from .post_training.job_list_response import JobListResponse
-
-__all__ = ["ListPostTrainingJobsResponse"]
-
-
-class ListPostTrainingJobsResponse(BaseModel):
-    data: JobListResponse
diff --git a/src/llama_stack_client/types/alpha/memory_retrieval_step.py b/src/llama_stack_client/types/alpha/memory_retrieval_step.py
deleted file mode 100644
index 1b5708ce..00000000
--- a/src/llama_stack_client/types/alpha/memory_retrieval_step.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from datetime import datetime
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from ..shared.interleaved_content import InterleavedContent
-
-__all__ = ["MemoryRetrievalStep"]
-
-
-class MemoryRetrievalStep(BaseModel):
-    inserted_context: InterleavedContent
-    """The context retrieved from the vector databases."""
-
-    step_id: str
-    """The ID of the step."""
-
-    step_type: Literal["memory_retrieval"]
-    """Type of the step in an agent turn."""
-
-    turn_id: str
-    """The ID of the turn."""
-
-    vector_store_ids: str
-    """The IDs of the vector databases to retrieve context from."""
-
-    completed_at: Optional[datetime] = None
-    """The time the step completed."""
-
-    started_at: Optional[datetime] = None
-    """The time the step started."""
diff --git a/src/llama_stack_client/types/alpha/post_training/__init__.py b/src/llama_stack_client/types/alpha/post_training/__init__.py
index 8b609eaa..d14ed874 100644
--- a/src/llama_stack_client/types/alpha/post_training/__init__.py
+++ b/src/llama_stack_client/types/alpha/post_training/__init__.py
@@ -7,10 +7,3 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
-
-from .job_cancel_params import JobCancelParams as JobCancelParams
-from .job_list_response import JobListResponse as JobListResponse
-from .job_status_params import JobStatusParams as JobStatusParams
-from .job_status_response import JobStatusResponse as JobStatusResponse
-from .job_artifacts_params import JobArtifactsParams as JobArtifactsParams
-from .job_artifacts_response import JobArtifactsResponse as JobArtifactsResponse
diff --git a/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py b/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
deleted file mode 100644
index e18e76e0..00000000
--- a/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["JobArtifactsParams"]
-
-
-class JobArtifactsParams(TypedDict, total=False):
-    job_uuid: Required[str]
-    """The UUID of the job to get the artifacts of."""
diff --git a/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py b/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
deleted file mode 100644
index 508ba75d..00000000
--- a/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from datetime import datetime
-
-from ...._models import BaseModel
-
-__all__ = ["JobArtifactsResponse", "Checkpoint", "CheckpointTrainingMetrics"]
-
-
-class CheckpointTrainingMetrics(BaseModel):
-    epoch: int
-    """Training epoch number"""
-
-    perplexity: float
-    """Perplexity metric indicating model confidence"""
-
-    train_loss: float
-    """Loss value on the training dataset"""
-
-    validation_loss: float
-    """Loss value on the validation dataset"""
-
-
-class Checkpoint(BaseModel):
-    created_at: datetime
-    """Timestamp when the checkpoint was created"""
-
-    epoch: int
-    """Training epoch when the checkpoint was saved"""
-
-    identifier: str
-    """Unique identifier for the checkpoint"""
-
-    path: str
-    """File system path where the checkpoint is stored"""
-
-    post_training_job_id: str
-    """Identifier of the training job that created this checkpoint"""
-
-    training_metrics: Optional[CheckpointTrainingMetrics] = None
-    """(Optional) Training metrics associated with this checkpoint"""
-
-
-class JobArtifactsResponse(BaseModel):
-    checkpoints: List[Checkpoint]
-    """List of model checkpoints created during training"""
-
-    job_uuid: str
-    """Unique identifier for the training job"""
diff --git a/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py b/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
deleted file mode 100644
index fc1f9a32..00000000
--- a/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["JobCancelParams"]
-
-
-class JobCancelParams(TypedDict, total=False):
-    job_uuid: Required[str]
-    """The UUID of the job to cancel."""
diff --git a/src/llama_stack_client/types/alpha/post_training/job_list_response.py b/src/llama_stack_client/types/alpha/post_training/job_list_response.py
deleted file mode 100644
index 95b5d7c5..00000000
--- a/src/llama_stack_client/types/alpha/post_training/job_list_response.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import TypeAlias
-
-from ...._models import BaseModel
-
-__all__ = ["JobListResponse", "JobListResponseItem"]
-
-
-class JobListResponseItem(BaseModel):
-    job_uuid: str
-
-
-JobListResponse: TypeAlias = List[JobListResponseItem]
diff --git a/src/llama_stack_client/types/alpha/post_training/job_status_params.py b/src/llama_stack_client/types/alpha/post_training/job_status_params.py
deleted file mode 100644
index 5b832347..00000000
--- a/src/llama_stack_client/types/alpha/post_training/job_status_params.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["JobStatusParams"]
-
-
-class JobStatusParams(TypedDict, total=False):
-    job_uuid: Required[str]
-    """The UUID of the job to get the status of."""
diff --git a/src/llama_stack_client/types/alpha/post_training/job_status_response.py b/src/llama_stack_client/types/alpha/post_training/job_status_response.py
deleted file mode 100644
index cfe9d54f..00000000
--- a/src/llama_stack_client/types/alpha/post_training/job_status_response.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from datetime import datetime
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["JobStatusResponse", "Checkpoint", "CheckpointTrainingMetrics"]
-
-
-class CheckpointTrainingMetrics(BaseModel):
-    epoch: int
-    """Training epoch number"""
-
-    perplexity: float
-    """Perplexity metric indicating model confidence"""
-
-    train_loss: float
-    """Loss value on the training dataset"""
-
-    validation_loss: float
-    """Loss value on the validation dataset"""
-
-
-class Checkpoint(BaseModel):
-    created_at: datetime
-    """Timestamp when the checkpoint was created"""
-
-    epoch: int
-    """Training epoch when the checkpoint was saved"""
-
-    identifier: str
-    """Unique identifier for the checkpoint"""
-
-    path: str
-    """File system path where the checkpoint is stored"""
-
-    post_training_job_id: str
-    """Identifier of the training job that created this checkpoint"""
-
-    training_metrics: Optional[CheckpointTrainingMetrics] = None
-    """(Optional) Training metrics associated with this checkpoint"""
-
-
-class JobStatusResponse(BaseModel):
-    checkpoints: List[Checkpoint]
-    """List of model checkpoints created during training"""
-
-    job_uuid: str
-    """Unique identifier for the training job"""
-
-    status: Literal["completed", "in_progress", "failed", "scheduled", "cancelled"]
-    """Current status of the training job"""
-
-    completed_at: Optional[datetime] = None
-    """(Optional) Timestamp when the job finished, if completed"""
-
-    resources_allocated: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-    """(Optional) Information about computational resources allocated to the job"""
-
-    scheduled_at: Optional[datetime] = None
-    """(Optional) Timestamp when the job was scheduled"""
-
-    started_at: Optional[datetime] = None
-    """(Optional) Timestamp when the job execution began"""
diff --git a/src/llama_stack_client/types/alpha/post_training_job.py b/src/llama_stack_client/types/alpha/post_training_job.py
deleted file mode 100644
index 5d3a5391..00000000
--- a/src/llama_stack_client/types/alpha/post_training_job.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ..._models import BaseModel
-
-__all__ = ["PostTrainingJob"]
-
-
-class PostTrainingJob(BaseModel):
-    job_uuid: str
diff --git a/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py b/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
deleted file mode 100644
index 35c9e023..00000000
--- a/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
+++ /dev/null
@@ -1,129 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = [
-    "PostTrainingPreferenceOptimizeParams",
-    "AlgorithmConfig",
-    "TrainingConfig",
-    "TrainingConfigDataConfig",
-    "TrainingConfigEfficiencyConfig",
-    "TrainingConfigOptimizerConfig",
-]
-
-
-class PostTrainingPreferenceOptimizeParams(TypedDict, total=False):
-    algorithm_config: Required[AlgorithmConfig]
-    """The algorithm configuration."""
-
-    finetuned_model: Required[str]
-    """The model to fine-tune."""
-
-    hyperparam_search_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """The hyperparam search configuration."""
-
-    job_uuid: Required[str]
-    """The UUID of the job to create."""
-
-    logger_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """The logger configuration."""
-
-    training_config: Required[TrainingConfig]
-    """The training configuration."""
-
-
-class AlgorithmConfig(TypedDict, total=False):
-    beta: Required[float]
-    """Temperature parameter for the DPO loss"""
-
-    loss_type: Required[Literal["sigmoid", "hinge", "ipo", "kto_pair"]]
-    """The type of loss function to use for DPO"""
-
-
-class TrainingConfigDataConfig(TypedDict, total=False):
-    batch_size: Required[int]
-    """Number of samples per training batch"""
-
-    data_format: Required[Literal["instruct", "dialog"]]
-    """Format of the dataset (instruct or dialog)"""
-
-    dataset_id: Required[str]
-    """Unique identifier for the training dataset"""
-
-    shuffle: Required[bool]
-    """Whether to shuffle the dataset during training"""
-
-    packed: bool
-    """
-    (Optional) Whether to pack multiple samples into a single sequence for
-    efficiency
-    """
-
-    train_on_input: bool
-    """(Optional) Whether to compute loss on input tokens as well as output tokens"""
-
-    validation_dataset_id: str
-    """(Optional) Unique identifier for the validation dataset"""
-
-
-class TrainingConfigEfficiencyConfig(TypedDict, total=False):
-    enable_activation_checkpointing: bool
-    """(Optional) Whether to use activation checkpointing to reduce memory usage"""
-
-    enable_activation_offloading: bool
-    """(Optional) Whether to offload activations to CPU to save GPU memory"""
-
-    fsdp_cpu_offload: bool
-    """(Optional) Whether to offload FSDP parameters to CPU"""
-
-    memory_efficient_fsdp_wrap: bool
-    """(Optional) Whether to use memory-efficient FSDP wrapping"""
-
-
-class TrainingConfigOptimizerConfig(TypedDict, total=False):
-    lr: Required[float]
-    """Learning rate for the optimizer"""
-
-    num_warmup_steps: Required[int]
-    """Number of steps for learning rate warmup"""
-
-    optimizer_type: Required[Literal["adam", "adamw", "sgd"]]
-    """Type of optimizer to use (adam, adamw, or sgd)"""
-
-    weight_decay: Required[float]
-    """Weight decay coefficient for regularization"""
-
-
-class TrainingConfig(TypedDict, total=False):
-    gradient_accumulation_steps: Required[int]
-    """Number of steps to accumulate gradients before updating"""
-
-    max_steps_per_epoch: Required[int]
-    """Maximum number of steps to run per epoch"""
-
-    n_epochs: Required[int]
-    """Number of training epochs to run"""
-
-    data_config: TrainingConfigDataConfig
-    """(Optional) Configuration for data loading and formatting"""
-
-    dtype: str
-    """(Optional) Data type for model parameters (bf16, fp16, fp32)"""
-
-    efficiency_config: TrainingConfigEfficiencyConfig
-    """(Optional) Configuration for memory and compute optimizations"""
-
-    max_validation_steps: int
-    """(Optional) Maximum number of validation steps per epoch"""
-
-    optimizer_config: TrainingConfigOptimizerConfig
-    """(Optional) Configuration for the optimization algorithm"""
diff --git a/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py b/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
deleted file mode 100644
index dfdc68e8..00000000
--- a/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from .algorithm_config_param import AlgorithmConfigParam
-
-__all__ = [
-    "PostTrainingSupervisedFineTuneParams",
-    "TrainingConfig",
-    "TrainingConfigDataConfig",
-    "TrainingConfigEfficiencyConfig",
-    "TrainingConfigOptimizerConfig",
-]
-
-
-class PostTrainingSupervisedFineTuneParams(TypedDict, total=False):
-    hyperparam_search_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """The hyperparam search configuration."""
-
-    job_uuid: Required[str]
-    """The UUID of the job to create."""
-
-    logger_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """The logger configuration."""
-
-    training_config: Required[TrainingConfig]
-    """The training configuration."""
-
-    algorithm_config: AlgorithmConfigParam
-    """The algorithm configuration."""
-
-    checkpoint_dir: str
-    """The directory to save checkpoint(s) to."""
-
-    model: str
-    """The model to fine-tune."""
-
-
-class TrainingConfigDataConfig(TypedDict, total=False):
-    batch_size: Required[int]
-    """Number of samples per training batch"""
-
-    data_format: Required[Literal["instruct", "dialog"]]
-    """Format of the dataset (instruct or dialog)"""
-
-    dataset_id: Required[str]
-    """Unique identifier for the training dataset"""
-
-    shuffle: Required[bool]
-    """Whether to shuffle the dataset during training"""
-
-    packed: bool
-    """
-    (Optional) Whether to pack multiple samples into a single sequence for
-    efficiency
-    """
-
-    train_on_input: bool
-    """(Optional) Whether to compute loss on input tokens as well as output tokens"""
-
-    validation_dataset_id: str
-    """(Optional) Unique identifier for the validation dataset"""
-
-
-class TrainingConfigEfficiencyConfig(TypedDict, total=False):
-    enable_activation_checkpointing: bool
-    """(Optional) Whether to use activation checkpointing to reduce memory usage"""
-
-    enable_activation_offloading: bool
-    """(Optional) Whether to offload activations to CPU to save GPU memory"""
-
-    fsdp_cpu_offload: bool
-    """(Optional) Whether to offload FSDP parameters to CPU"""
-
-    memory_efficient_fsdp_wrap: bool
-    """(Optional) Whether to use memory-efficient FSDP wrapping"""
-
-
-class TrainingConfigOptimizerConfig(TypedDict, total=False):
-    lr: Required[float]
-    """Learning rate for the optimizer"""
-
-    num_warmup_steps: Required[int]
-    """Number of steps for learning rate warmup"""
-
-    optimizer_type: Required[Literal["adam", "adamw", "sgd"]]
-    """Type of optimizer to use (adam, adamw, or sgd)"""
-
-    weight_decay: Required[float]
-    """Weight decay coefficient for regularization"""
-
-
-class TrainingConfig(TypedDict, total=False):
-    gradient_accumulation_steps: Required[int]
-    """Number of steps to accumulate gradients before updating"""
-
-    max_steps_per_epoch: Required[int]
-    """Maximum number of steps to run per epoch"""
-
-    n_epochs: Required[int]
-    """Number of training epochs to run"""
-
-    data_config: TrainingConfigDataConfig
-    """(Optional) Configuration for data loading and formatting"""
-
-    dtype: str
-    """(Optional) Data type for model parameters (bf16, fp16, fp32)"""
-
-    efficiency_config: TrainingConfigEfficiencyConfig
-    """(Optional) Configuration for memory and compute optimizations"""
-
-    max_validation_steps: int
-    """(Optional) Maximum number of validation steps per epoch"""
-
-    optimizer_config: TrainingConfigOptimizerConfig
-    """(Optional) Configuration for the optimization algorithm"""
diff --git a/src/llama_stack_client/types/alpha/shield_call_step.py b/src/llama_stack_client/types/alpha/shield_call_step.py
deleted file mode 100644
index f332a4d5..00000000
--- a/src/llama_stack_client/types/alpha/shield_call_step.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from datetime import datetime
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from ..shared.safety_violation import SafetyViolation
-
-__all__ = ["ShieldCallStep"]
-
-
-class ShieldCallStep(BaseModel):
-    step_id: str
-    """The ID of the step."""
-
-    step_type: Literal["shield_call"]
-    """Type of the step in an agent turn."""
-
-    turn_id: str
-    """The ID of the turn."""
-
-    completed_at: Optional[datetime] = None
-    """The time the step completed."""
-
-    started_at: Optional[datetime] = None
-    """The time the step started."""
-
-    violation: Optional[SafetyViolation] = None
-    """The violation from the shield call."""
diff --git a/src/llama_stack_client/types/alpha/tool_execution_step.py b/src/llama_stack_client/types/alpha/tool_execution_step.py
deleted file mode 100644
index 04259318..00000000
--- a/src/llama_stack_client/types/alpha/tool_execution_step.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from datetime import datetime
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .tool_response import ToolResponse
-from ..shared.tool_call import ToolCall
-
-__all__ = ["ToolExecutionStep"]
-
-
-class ToolExecutionStep(BaseModel):
-    step_id: str
-    """The ID of the step."""
-
-    step_type: Literal["tool_execution"]
-    """Type of the step in an agent turn."""
-
-    tool_calls: List[ToolCall]
-    """The tool calls to execute."""
-
-    tool_responses: List[ToolResponse]
-    """The tool responses from the tool calls."""
-
-    turn_id: str
-    """The ID of the turn."""
-
-    completed_at: Optional[datetime] = None
-    """The time the step completed."""
-
-    started_at: Optional[datetime] = None
-    """The time the step started."""
diff --git a/src/llama_stack_client/types/alpha/tool_response.py b/src/llama_stack_client/types/alpha/tool_response.py
deleted file mode 100644
index 250ae9de..00000000
--- a/src/llama_stack_client/types/alpha/tool_response.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from ..shared.interleaved_content import InterleavedContent
-
-__all__ = ["ToolResponse"]
-
-
-class ToolResponse(BaseModel):
-    call_id: str
-    """Unique identifier for the tool call this response is for"""
-
-    content: InterleavedContent
-    """The response content from the tool"""
-
-    tool_name: Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]
-    """Name of the tool that was invoked"""
-
-    metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-    """(Optional) Additional metadata about the tool response"""
diff --git a/src/llama_stack_client/types/alpha/tool_response_param.py b/src/llama_stack_client/types/alpha/tool_response_param.py
deleted file mode 100644
index 9d745da5..00000000
--- a/src/llama_stack_client/types/alpha/tool_response_param.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from ..shared_params.interleaved_content import InterleavedContent
-
-__all__ = ["ToolResponseParam"]
-
-
-class ToolResponseParam(TypedDict, total=False):
-    call_id: Required[str]
-    """Unique identifier for the tool call this response is for"""
-
-    content: Required[InterleavedContent]
-    """The response content from the tool"""
-
-    tool_name: Required[Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]]
-    """Name of the tool that was invoked"""
-
-    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """(Optional) Additional metadata about the tool response"""
diff --git a/src/llama_stack_client/types/beta/__init__.py b/src/llama_stack_client/types/beta/__init__.py
index aab8d1b8..f8ee8b14 100644
--- a/src/llama_stack_client/types/beta/__init__.py
+++ b/src/llama_stack_client/types/beta/__init__.py
@@ -1,12 +1,3 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
-
-from .dataset_list_response import DatasetListResponse as DatasetListResponse
-from .list_datasets_response import ListDatasetsResponse as ListDatasetsResponse
-from .dataset_iterrows_params import DatasetIterrowsParams as DatasetIterrowsParams
-from .dataset_register_params import DatasetRegisterParams as DatasetRegisterParams
-from .dataset_appendrows_params import DatasetAppendrowsParams as DatasetAppendrowsParams
-from .dataset_iterrows_response import DatasetIterrowsResponse as DatasetIterrowsResponse
-from .dataset_register_response import DatasetRegisterResponse as DatasetRegisterResponse
-from .dataset_retrieve_response import DatasetRetrieveResponse as DatasetRetrieveResponse
diff --git a/src/llama_stack_client/types/beta/dataset_appendrows_params.py b/src/llama_stack_client/types/beta/dataset_appendrows_params.py
deleted file mode 100644
index b929d790..00000000
--- a/src/llama_stack_client/types/beta/dataset_appendrows_params.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-__all__ = ["DatasetAppendrowsParams"]
-
-
-class DatasetAppendrowsParams(TypedDict, total=False):
-    rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
-    """The rows to append to the dataset."""
diff --git a/src/llama_stack_client/types/beta/dataset_iterrows_params.py b/src/llama_stack_client/types/beta/dataset_iterrows_params.py
deleted file mode 100644
index 262e0e3f..00000000
--- a/src/llama_stack_client/types/beta/dataset_iterrows_params.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import TypedDict
-
-__all__ = ["DatasetIterrowsParams"]
-
-
-class DatasetIterrowsParams(TypedDict, total=False):
-    limit: int
-    """The number of rows to get."""
-
-    start_index: int
-    """Index into dataset for the first row to get. Get all rows if None."""
diff --git a/src/llama_stack_client/types/beta/dataset_iterrows_response.py b/src/llama_stack_client/types/beta/dataset_iterrows_response.py
deleted file mode 100644
index 5b23d46d..00000000
--- a/src/llama_stack_client/types/beta/dataset_iterrows_response.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-
-from ..._models import BaseModel
-
-__all__ = ["DatasetIterrowsResponse"]
-
-
-class DatasetIterrowsResponse(BaseModel):
-    data: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """The list of items for the current page"""
-
-    has_more: bool
-    """Whether there are more items available after this set"""
-
-    url: Optional[str] = None
-    """The URL for accessing this list"""
diff --git a/src/llama_stack_client/types/beta/dataset_list_response.py b/src/llama_stack_client/types/beta/dataset_list_response.py
deleted file mode 100644
index 7e6c1141..00000000
--- a/src/llama_stack_client/types/beta/dataset_list_response.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = [
-    "DatasetListResponse",
-    "DatasetListResponseItem",
-    "DatasetListResponseItemSource",
-    "DatasetListResponseItemSourceUriDataSource",
-    "DatasetListResponseItemSourceRowsDataSource",
-]
-
-
-class DatasetListResponseItemSourceUriDataSource(BaseModel):
-    type: Literal["uri"]
-
-    uri: str
-    """The dataset can be obtained from a URI.
-
-    E.g. - "https://mywebsite.com/mydata.jsonl" - "lsfs://mydata.jsonl" -
-    "data:csv;base64,{base64_content}"
-    """
-
-
-class DatasetListResponseItemSourceRowsDataSource(BaseModel):
-    rows: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """The dataset is stored in rows.
-
-    E.g. - [ {"messages": [{"role": "user", "content": "Hello, world!"}, {"role":
-    "assistant", "content": "Hello, world!"}]} ]
-    """
-
-    type: Literal["rows"]
-
-
-DatasetListResponseItemSource: TypeAlias = Annotated[
-    Union[DatasetListResponseItemSourceUriDataSource, DatasetListResponseItemSourceRowsDataSource],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DatasetListResponseItem(BaseModel):
-    identifier: str
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-    """Additional metadata for the dataset"""
-
-    provider_id: str
-
-    purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]
-    """Purpose of the dataset indicating its intended use"""
-
-    source: DatasetListResponseItemSource
-    """Data source configuration for the dataset"""
-
-    type: Literal["dataset"]
-    """Type of resource, always 'dataset' for datasets"""
-
-    provider_resource_id: Optional[str] = None
-
-
-DatasetListResponse: TypeAlias = List[DatasetListResponseItem]
diff --git a/src/llama_stack_client/types/beta/dataset_register_params.py b/src/llama_stack_client/types/beta/dataset_register_params.py
deleted file mode 100644
index 75803a8a..00000000
--- a/src/llama_stack_client/types/beta/dataset_register_params.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-__all__ = ["DatasetRegisterParams", "Source", "SourceUriDataSource", "SourceRowsDataSource"]
-
-
-class DatasetRegisterParams(TypedDict, total=False):
-    purpose: Required[Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]]
-    """The purpose of the dataset.
-
-    One of: - "post-training/messages": The dataset contains a messages column with
-    list of messages for post-training. { "messages": [ {"role": "user", "content":
-    "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] } -
-    "eval/question-answer": The dataset contains a question column and an answer
-    column for evaluation. { "question": "What is the capital of France?", "answer":
-    "Paris" } - "eval/messages-answer": The dataset contains a messages column with
-    list of messages and an answer column for evaluation. { "messages": [ {"role":
-    "user", "content": "Hello, my name is John Doe."}, {"role": "assistant",
-    "content": "Hello, John Doe. How can I help you today?"}, {"role": "user",
-    "content": "What's my name?"}, ], "answer": "John Doe" }
-    """
-
-    source: Required[Source]
-    """The data source of the dataset.
-
-    Ensure that the data source schema is compatible with the purpose of the
-    dataset. Examples: - { "type": "uri", "uri":
-    "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-    "lsfs://mydata.jsonl" } - { "type": "uri", "uri":
-    "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
-    "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows", "rows": [
-    { "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
-    "assistant", "content": "Hello, world!"}, ] } ] }
-    """
-
-    dataset_id: str
-    """The ID of the dataset. If not provided, an ID will be generated."""
-
-    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """The metadata for the dataset. - E.g. {"description": "My dataset"}."""
-
-
-class SourceUriDataSource(TypedDict, total=False):
-    type: Required[Literal["uri"]]
-
-    uri: Required[str]
-    """The dataset can be obtained from a URI.
-
-    E.g. - "https://mywebsite.com/mydata.jsonl" - "lsfs://mydata.jsonl" -
-    "data:csv;base64,{base64_content}"
-    """
-
-
-class SourceRowsDataSource(TypedDict, total=False):
-    rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
-    """The dataset is stored in rows.
-
-    E.g. - [ {"messages": [{"role": "user", "content": "Hello, world!"}, {"role":
-    "assistant", "content": "Hello, world!"}]} ]
-    """
-
-    type: Required[Literal["rows"]]
-
-
-Source: TypeAlias = Union[SourceUriDataSource, SourceRowsDataSource]
diff --git a/src/llama_stack_client/types/beta/dataset_register_response.py b/src/llama_stack_client/types/beta/dataset_register_response.py
deleted file mode 100644
index e9bb82d2..00000000
--- a/src/llama_stack_client/types/beta/dataset_register_response.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = ["DatasetRegisterResponse", "Source", "SourceUriDataSource", "SourceRowsDataSource"]
-
-
-class SourceUriDataSource(BaseModel):
-    type: Literal["uri"]
-
-    uri: str
-    """The dataset can be obtained from a URI.
-
-    E.g. - "https://mywebsite.com/mydata.jsonl" - "lsfs://mydata.jsonl" -
-    "data:csv;base64,{base64_content}"
-    """
-
-
-class SourceRowsDataSource(BaseModel):
-    rows: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """The dataset is stored in rows.
-
-    E.g. - [ {"messages": [{"role": "user", "content": "Hello, world!"}, {"role":
-    "assistant", "content": "Hello, world!"}]} ]
-    """
-
-    type: Literal["rows"]
-
-
-Source: TypeAlias = Annotated[Union[SourceUriDataSource, SourceRowsDataSource], PropertyInfo(discriminator="type")]
-
-
-class DatasetRegisterResponse(BaseModel):
-    identifier: str
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-    """Additional metadata for the dataset"""
-
-    provider_id: str
-
-    purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]
-    """Purpose of the dataset indicating its intended use"""
-
-    source: Source
-    """Data source configuration for the dataset"""
-
-    type: Literal["dataset"]
-    """Type of resource, always 'dataset' for datasets"""
-
-    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/beta/dataset_retrieve_response.py b/src/llama_stack_client/types/beta/dataset_retrieve_response.py
deleted file mode 100644
index 3358288d..00000000
--- a/src/llama_stack_client/types/beta/dataset_retrieve_response.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = ["DatasetRetrieveResponse", "Source", "SourceUriDataSource", "SourceRowsDataSource"]
-
-
-class SourceUriDataSource(BaseModel):
-    type: Literal["uri"]
-
-    uri: str
-    """The dataset can be obtained from a URI.
-
-    E.g. - "https://mywebsite.com/mydata.jsonl" - "lsfs://mydata.jsonl" -
-    "data:csv;base64,{base64_content}"
-    """
-
-
-class SourceRowsDataSource(BaseModel):
-    rows: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """The dataset is stored in rows.
-
-    E.g. - [ {"messages": [{"role": "user", "content": "Hello, world!"}, {"role":
-    "assistant", "content": "Hello, world!"}]} ]
-    """
-
-    type: Literal["rows"]
-
-
-Source: TypeAlias = Annotated[Union[SourceUriDataSource, SourceRowsDataSource], PropertyInfo(discriminator="type")]
-
-
-class DatasetRetrieveResponse(BaseModel):
-    identifier: str
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-    """Additional metadata for the dataset"""
-
-    provider_id: str
-
-    purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]
-    """Purpose of the dataset indicating its intended use"""
-
-    source: Source
-    """Data source configuration for the dataset"""
-
-    type: Literal["dataset"]
-    """Type of resource, always 'dataset' for datasets"""
-
-    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/beta/list_datasets_response.py b/src/llama_stack_client/types/beta/list_datasets_response.py
deleted file mode 100644
index 4f71ae16..00000000
--- a/src/llama_stack_client/types/beta/list_datasets_response.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ..._models import BaseModel
-from .dataset_list_response import DatasetListResponse
-
-__all__ = ["ListDatasetsResponse"]
-
-
-class ListDatasetsResponse(BaseModel):
-    data: DatasetListResponse
-    """List of datasets"""
diff --git a/src/llama_stack_client/types/conversation_create_params.py b/src/llama_stack_client/types/conversation_create_params.py
index c51245dd..96fbb82e 100644
--- a/src/llama_stack_client/types/conversation_create_params.py
+++ b/src/llama_stack_client/types/conversation_create_params.py
@@ -20,6 +20,7 @@
     "ItemOpenAIResponseMessageContentUnionMember1",
     "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "ItemOpenAIResponseMessageContentUnionMember2",
     "ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -64,13 +65,34 @@ class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
     type: Required[Literal["input_image"]]
     """Content type identifier, always "input_image" """
 
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: str
     """(Optional) URL of the image content"""
 
 
+class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(TypedDict, total=False):
+    type: Required[Literal["input_file"]]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: str
+    """The data of the file to be sent to the model."""
+
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: str
+    """The URL of the file to be sent to the model."""
+
+    filename: str
+    """The name of the file to be sent to the model."""
+
+
 ItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
     ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
     ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+    ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
 ]
 
 
diff --git a/src/llama_stack_client/types/conversations/item_create_params.py b/src/llama_stack_client/types/conversations/item_create_params.py
index 8df31144..111c39fb 100644
--- a/src/llama_stack_client/types/conversations/item_create_params.py
+++ b/src/llama_stack_client/types/conversations/item_create_params.py
@@ -20,6 +20,7 @@
     "ItemOpenAIResponseMessageContentUnionMember1",
     "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "ItemOpenAIResponseMessageContentUnionMember2",
     "ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -61,13 +62,34 @@ class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
     type: Required[Literal["input_image"]]
     """Content type identifier, always "input_image" """
 
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: str
     """(Optional) URL of the image content"""
 
 
+class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(TypedDict, total=False):
+    type: Required[Literal["input_file"]]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: str
+    """The data of the file to be sent to the model."""
+
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: str
+    """The URL of the file to be sent to the model."""
+
+    filename: str
+    """The name of the file to be sent to the model."""
+
+
 ItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
     ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
     ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+    ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
 ]
 
 
diff --git a/src/llama_stack_client/types/conversations/item_create_response.py b/src/llama_stack_client/types/conversations/item_create_response.py
index c382e2b9..580aaf23 100644
--- a/src/llama_stack_client/types/conversations/item_create_response.py
+++ b/src/llama_stack_client/types/conversations/item_create_response.py
@@ -19,6 +19,7 @@
     "DataOpenAIResponseMessageContentUnionMember1",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "DataOpenAIResponseMessageContentUnionMember2",
     "DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -55,14 +56,35 @@ class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 DataOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/conversations/item_get_response.py b/src/llama_stack_client/types/conversations/item_get_response.py
index 9f8d4bda..434e4639 100644
--- a/src/llama_stack_client/types/conversations/item_get_response.py
+++ b/src/llama_stack_client/types/conversations/item_get_response.py
@@ -18,6 +18,7 @@
     "OpenAIResponseMessageContentUnionMember1",
     "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -54,14 +55,35 @@ class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentI
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 OpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/conversations/item_list_response.py b/src/llama_stack_client/types/conversations/item_list_response.py
index b95f56fb..d6ba4735 100644
--- a/src/llama_stack_client/types/conversations/item_list_response.py
+++ b/src/llama_stack_client/types/conversations/item_list_response.py
@@ -18,6 +18,7 @@
     "OpenAIResponseMessageContentUnionMember1",
     "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -54,14 +55,35 @@ class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentI
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 OpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/response_create_params.py b/src/llama_stack_client/types/response_create_params.py
index c8b48657..f99cd037 100644
--- a/src/llama_stack_client/types/response_create_params.py
+++ b/src/llama_stack_client/types/response_create_params.py
@@ -20,6 +20,7 @@
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember2",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -38,6 +39,11 @@
     "InputUnionMember1OpenAIResponseMcpApprovalRequest",
     "InputUnionMember1OpenAIResponseInputFunctionToolCallOutput",
     "InputUnionMember1OpenAIResponseMcpApprovalResponse",
+    "Prompt",
+    "PromptVariables",
+    "PromptVariablesOpenAIResponseInputMessageContentText",
+    "PromptVariablesOpenAIResponseInputMessageContentImage",
+    "PromptVariablesOpenAIResponseInputMessageContentFile",
     "Text",
     "TextFormat",
     "Tool",
@@ -83,6 +89,9 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     responses.
     """
 
+    prompt: Prompt
+    """(Optional) Prompt object with ID, version, and variables."""
+
     store: bool
 
     temperature: float
@@ -112,13 +121,36 @@ class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInp
     type: Required[Literal["input_image"]]
     """Content type identifier, always "input_image" """
 
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: str
     """(Optional) URL of the image content"""
 
 
+class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(
+    TypedDict, total=False
+):
+    type: Required[Literal["input_file"]]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: str
+    """The data of the file to be sent to the model."""
+
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: str
+    """The URL of the file to be sent to the model."""
+
+    filename: str
+    """The name of the file to be sent to the model."""
+
+
 InputUnionMember1OpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
     InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
     InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+    InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
 ]
 
 
@@ -400,6 +432,67 @@ class InputUnionMember1OpenAIResponseMcpApprovalResponse(TypedDict, total=False)
 ]
 
 
+class PromptVariablesOpenAIResponseInputMessageContentText(TypedDict, total=False):
+    text: Required[str]
+    """The text content of the input message"""
+
+    type: Required[Literal["input_text"]]
+    """Content type identifier, always "input_text" """
+
+
+class PromptVariablesOpenAIResponseInputMessageContentImage(TypedDict, total=False):
+    detail: Required[Literal["low", "high", "auto"]]
+    """Level of detail for image processing, can be "low", "high", or "auto" """
+
+    type: Required[Literal["input_image"]]
+    """Content type identifier, always "input_image" """
+
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
+    image_url: str
+    """(Optional) URL of the image content"""
+
+
+class PromptVariablesOpenAIResponseInputMessageContentFile(TypedDict, total=False):
+    type: Required[Literal["input_file"]]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: str
+    """The data of the file to be sent to the model."""
+
+    file_id: str
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: str
+    """The URL of the file to be sent to the model."""
+
+    filename: str
+    """The name of the file to be sent to the model."""
+
+
+PromptVariables: TypeAlias = Union[
+    PromptVariablesOpenAIResponseInputMessageContentText,
+    PromptVariablesOpenAIResponseInputMessageContentImage,
+    PromptVariablesOpenAIResponseInputMessageContentFile,
+]
+
+
+class Prompt(TypedDict, total=False):
+    id: Required[str]
+    """Unique identifier of the prompt template"""
+
+    variables: Dict[str, PromptVariables]
+    """
+    Dictionary of variable names to OpenAIResponseInputMessageContent structure for
+    template substitution. The substitution values can either be strings, or other
+    Response input types like images or files.
+    """
+
+    version: str
+    """Version number of the prompt to use (defaults to latest if not specified)"""
+
+
 class TextFormat(TypedDict, total=False):
     type: Required[Literal["text", "json_schema", "json_object"]]
     """Must be "text", "json_schema", or "json_object" to identify the format type"""
diff --git a/src/llama_stack_client/types/response_list_response.py b/src/llama_stack_client/types/response_list_response.py
index 78c683b4..ccd9a3d7 100644
--- a/src/llama_stack_client/types/response_list_response.py
+++ b/src/llama_stack_client/types/response_list_response.py
@@ -21,6 +21,7 @@
     "InputOpenAIResponseMessageContentUnionMember1",
     "InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "InputOpenAIResponseMessageContentUnionMember2",
     "InputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "InputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -44,6 +45,7 @@
     "OutputOpenAIResponseMessageContentUnionMember1",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OutputOpenAIResponseMessageContentUnionMember2",
     "OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -63,6 +65,11 @@
     "Text",
     "TextFormat",
     "Error",
+    "Prompt",
+    "PromptVariables",
+    "PromptVariablesOpenAIResponseInputMessageContentText",
+    "PromptVariablesOpenAIResponseInputMessageContentImage",
+    "PromptVariablesOpenAIResponseInputMessageContentFile",
     "Tool",
     "ToolOpenAIResponseInputToolWebSearch",
     "ToolOpenAIResponseInputToolFileSearch",
@@ -92,14 +99,35 @@ class InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCon
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 InputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
@@ -396,14 +424,35 @@ class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCo
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 OutputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
@@ -699,6 +748,70 @@ class Error(BaseModel):
     """Human-readable error message describing the failure"""
 
 
+class PromptVariablesOpenAIResponseInputMessageContentText(BaseModel):
+    text: str
+    """The text content of the input message"""
+
+    type: Literal["input_text"]
+    """Content type identifier, always "input_text" """
+
+
+class PromptVariablesOpenAIResponseInputMessageContentImage(BaseModel):
+    detail: Literal["low", "high", "auto"]
+    """Level of detail for image processing, can be "low", "high", or "auto" """
+
+    type: Literal["input_image"]
+    """Content type identifier, always "input_image" """
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    image_url: Optional[str] = None
+    """(Optional) URL of the image content"""
+
+
+class PromptVariablesOpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
+PromptVariables: TypeAlias = Annotated[
+    Union[
+        PromptVariablesOpenAIResponseInputMessageContentText,
+        PromptVariablesOpenAIResponseInputMessageContentImage,
+        PromptVariablesOpenAIResponseInputMessageContentFile,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class Prompt(BaseModel):
+    id: str
+    """Unique identifier of the prompt template"""
+
+    variables: Optional[Dict[str, PromptVariables]] = None
+    """
+    Dictionary of variable names to OpenAIResponseInputMessageContent structure for
+    template substitution. The substitution values can either be strings, or other
+    Response input types like images or files.
+    """
+
+    version: Optional[str] = None
+    """Version number of the prompt to use (defaults to latest if not specified)"""
+
+
 class ToolOpenAIResponseInputToolWebSearch(BaseModel):
     type: Literal["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
     """Web search tool type variant to use"""
@@ -842,6 +955,9 @@ class ResponseListResponse(BaseModel):
     previous_response_id: Optional[str] = None
     """(Optional) ID of the previous response in a conversation"""
 
+    prompt: Optional[Prompt] = None
+    """(Optional) Reference to a prompt template and its variables."""
+
     temperature: Optional[float] = None
     """(Optional) Sampling temperature used for generation"""
 
diff --git a/src/llama_stack_client/types/response_object.py b/src/llama_stack_client/types/response_object.py
index 57f708ce..706f50e2 100644
--- a/src/llama_stack_client/types/response_object.py
+++ b/src/llama_stack_client/types/response_object.py
@@ -21,6 +21,7 @@
     "OutputOpenAIResponseMessageContentUnionMember1",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OutputOpenAIResponseMessageContentUnionMember2",
     "OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -40,6 +41,11 @@
     "Text",
     "TextFormat",
     "Error",
+    "Prompt",
+    "PromptVariables",
+    "PromptVariablesOpenAIResponseInputMessageContentText",
+    "PromptVariablesOpenAIResponseInputMessageContentImage",
+    "PromptVariablesOpenAIResponseInputMessageContentFile",
     "Tool",
     "ToolOpenAIResponseInputToolWebSearch",
     "ToolOpenAIResponseInputToolFileSearch",
@@ -69,14 +75,35 @@ class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCo
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 OutputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
@@ -372,6 +399,70 @@ class Error(BaseModel):
     """Human-readable error message describing the failure"""
 
 
+class PromptVariablesOpenAIResponseInputMessageContentText(BaseModel):
+    text: str
+    """The text content of the input message"""
+
+    type: Literal["input_text"]
+    """Content type identifier, always "input_text" """
+
+
+class PromptVariablesOpenAIResponseInputMessageContentImage(BaseModel):
+    detail: Literal["low", "high", "auto"]
+    """Level of detail for image processing, can be "low", "high", or "auto" """
+
+    type: Literal["input_image"]
+    """Content type identifier, always "input_image" """
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    image_url: Optional[str] = None
+    """(Optional) URL of the image content"""
+
+
+class PromptVariablesOpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
+PromptVariables: TypeAlias = Annotated[
+    Union[
+        PromptVariablesOpenAIResponseInputMessageContentText,
+        PromptVariablesOpenAIResponseInputMessageContentImage,
+        PromptVariablesOpenAIResponseInputMessageContentFile,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class Prompt(BaseModel):
+    id: str
+    """Unique identifier of the prompt template"""
+
+    variables: Optional[Dict[str, PromptVariables]] = None
+    """
+    Dictionary of variable names to OpenAIResponseInputMessageContent structure for
+    template substitution. The substitution values can either be strings, or other
+    Response input types like images or files.
+    """
+
+    version: Optional[str] = None
+    """Version number of the prompt to use (defaults to latest if not specified)"""
+
+
 class ToolOpenAIResponseInputToolWebSearch(BaseModel):
     type: Literal["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
     """Web search tool type variant to use"""
@@ -522,6 +613,9 @@ def output_text(self) -> str:
     previous_response_id: Optional[str] = None
     """(Optional) ID of the previous response in a conversation"""
 
+    prompt: Optional[Prompt] = None
+    """(Optional) Reference to a prompt template and its variables."""
+
     temperature: Optional[float] = None
     """(Optional) Sampling temperature used for generation"""
 
diff --git a/src/llama_stack_client/types/response_object_stream.py b/src/llama_stack_client/types/response_object_stream.py
index a75ac721..16fe6c6d 100644
--- a/src/llama_stack_client/types/response_object_stream.py
+++ b/src/llama_stack_client/types/response_object_stream.py
@@ -23,6 +23,7 @@
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -45,6 +46,7 @@
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -159,14 +161,37 @@ class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessage
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(
+    BaseModel
+):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
@@ -470,14 +495,37 @@ class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageC
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(
+    BaseModel
+):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/responses/input_item_list_response.py b/src/llama_stack_client/types/responses/input_item_list_response.py
index b812ee62..71a59f50 100644
--- a/src/llama_stack_client/types/responses/input_item_list_response.py
+++ b/src/llama_stack_client/types/responses/input_item_list_response.py
@@ -19,6 +19,7 @@
     "DataOpenAIResponseMessageContentUnionMember1",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
+    "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "DataOpenAIResponseMessageContentUnionMember2",
     "DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -55,14 +56,35 @@ class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
+class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The data of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """(Optional) The ID of the file to be sent to the model."""
+
+    file_url: Optional[str] = None
+    """The URL of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
+
+
 DataOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
+        DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/route_list_params.py b/src/llama_stack_client/types/route_list_params.py
new file mode 100644
index 00000000..764b13c7
--- /dev/null
+++ b/src/llama_stack_client/types/route_list_params.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["RouteListParams"]
+
+
+class RouteListParams(TypedDict, total=False):
+    api_filter: Literal["v1", "v1alpha", "v1beta", "deprecated"]
+    """Optional filter to control which routes are returned.
+
+    Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at
+    that level, or 'deprecated' to show deprecated routes across all levels. If not
+    specified, returns only non-deprecated v1 routes.
+    """
diff --git a/src/llama_stack_client/types/shared/__init__.py b/src/llama_stack_client/types/shared/__init__.py
index c18a9358..a4aa1259 100644
--- a/src/llama_stack_client/types/shared/__init__.py
+++ b/src/llama_stack_client/types/shared/__init__.py
@@ -10,14 +10,11 @@
 from .document import Document as Document
 from .tool_call import ToolCall as ToolCall
 from .param_type import ParamType as ParamType
-from .agent_config import AgentConfig as AgentConfig
 from .query_config import QueryConfig as QueryConfig
 from .query_result import QueryResult as QueryResult
 from .user_message import UserMessage as UserMessage
 from .scoring_result import ScoringResult as ScoringResult
 from .system_message import SystemMessage as SystemMessage
-from .response_format import ResponseFormat as ResponseFormat
-from .sampling_params import SamplingParams as SamplingParams
 from .safety_violation import SafetyViolation as SafetyViolation
 from .completion_message import CompletionMessage as CompletionMessage
 from .interleaved_content import InterleavedContent as InterleavedContent
diff --git a/src/llama_stack_client/types/shared/agent_config.py b/src/llama_stack_client/types/shared/agent_config.py
deleted file mode 100644
index 71d53ca5..00000000
--- a/src/llama_stack_client/types/shared/agent_config.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, TypeAlias
-
-from ..._models import BaseModel
-from ..tool_def import ToolDef
-from .response_format import ResponseFormat
-from .sampling_params import SamplingParams
-
-__all__ = ["AgentConfig", "ToolConfig", "Toolgroup", "ToolgroupAgentToolGroupWithArgs"]
-
-
-class ToolConfig(BaseModel):
-    system_message_behavior: Optional[Literal["append", "replace"]] = None
-    """(Optional) Config for how to override the default system prompt.
-
-    - `SystemMessageBehavior.append`: Appends the provided system message to the
-      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
-      system prompt with the provided system message. The system message can include
-      the string '{{function_definitions}}' to indicate where the function
-      definitions should be inserted.
-    """
-
-    tool_choice: Union[Literal["auto", "required", "none"], str, None] = None
-    """(Optional) Whether tool use is automatic, required, or none.
-
-    Can also specify a tool name to use a specific tool. Defaults to
-    ToolChoice.auto.
-    """
-
-    tool_prompt_format: Optional[Literal["json", "function_tag", "python_list"]] = None
-    """(Optional) Instructs the model how to format tool calls.
-
-    By default, Llama Stack will attempt to use a format that is best adapted to the
-    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
-    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-    are output as Python syntax -- a list of function calls.
-    """
-
-
-class ToolgroupAgentToolGroupWithArgs(BaseModel):
-    args: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    name: str
-
-
-Toolgroup: TypeAlias = Union[str, ToolgroupAgentToolGroupWithArgs]
-
-
-class AgentConfig(BaseModel):
-    instructions: str
-    """The system instructions for the agent"""
-
-    model: str
-    """The model identifier to use for the agent"""
-
-    client_tools: Optional[List[ToolDef]] = None
-
-    enable_session_persistence: Optional[bool] = None
-    """Optional flag indicating whether session data has to be persisted"""
-
-    input_shields: Optional[List[str]] = None
-
-    max_infer_iters: Optional[int] = None
-
-    name: Optional[str] = None
-    """Optional name for the agent, used in telemetry and identification"""
-
-    output_shields: Optional[List[str]] = None
-
-    response_format: Optional[ResponseFormat] = None
-    """Optional response format configuration"""
-
-    sampling_params: Optional[SamplingParams] = None
-    """Sampling parameters."""
-
-    tool_choice: Optional[Literal["auto", "required", "none"]] = None
-    """Whether tool use is required or automatic.
-
-    This is a hint to the model which may not be followed. It depends on the
-    Instruction Following capabilities of the model.
-    """
-
-    tool_config: Optional[ToolConfig] = None
-    """Configuration for tool use."""
-
-    tool_prompt_format: Optional[Literal["json", "function_tag", "python_list"]] = None
-    """Prompt format for calling custom / zero shot tools."""
-
-    toolgroups: Optional[List[Toolgroup]] = None
diff --git a/src/llama_stack_client/types/shared/response_format.py b/src/llama_stack_client/types/shared/response_format.py
deleted file mode 100644
index 0d601a23..00000000
--- a/src/llama_stack_client/types/shared/response_format.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = ["ResponseFormat", "JsonSchemaResponseFormat", "GrammarResponseFormat"]
-
-
-class JsonSchemaResponseFormat(BaseModel):
-    json_schema: Dict[str, Union[bool, float, str, List[object], object, None]]
-    """The JSON schema the response should conform to.
-
-    In a Python SDK, this is often a `pydantic` model.
-    """
-
-    type: Literal["json_schema"]
-    """Must be "json_schema" to identify this format type"""
-
-
-class GrammarResponseFormat(BaseModel):
-    bnf: Dict[str, Union[bool, float, str, List[object], object, None]]
-    """The BNF grammar specification the response should conform to"""
-
-    type: Literal["grammar"]
-    """Must be "grammar" to identify this format type"""
-
-
-ResponseFormat: TypeAlias = Annotated[
-    Union[JsonSchemaResponseFormat, GrammarResponseFormat], PropertyInfo(discriminator="type")
-]
diff --git a/src/llama_stack_client/types/shared/sampling_params.py b/src/llama_stack_client/types/shared/sampling_params.py
deleted file mode 100644
index f34ed6f5..00000000
--- a/src/llama_stack_client/types/shared/sampling_params.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = [
-    "SamplingParams",
-    "Strategy",
-    "StrategyGreedySamplingStrategy",
-    "StrategyTopPSamplingStrategy",
-    "StrategyTopKSamplingStrategy",
-]
-
-
-class StrategyGreedySamplingStrategy(BaseModel):
-    type: Literal["greedy"]
-    """Must be "greedy" to identify this sampling strategy"""
-
-
-class StrategyTopPSamplingStrategy(BaseModel):
-    type: Literal["top_p"]
-    """Must be "top_p" to identify this sampling strategy"""
-
-    temperature: Optional[float] = None
-    """Controls randomness in sampling. Higher values increase randomness"""
-
-    top_p: Optional[float] = None
-    """Cumulative probability threshold for nucleus sampling. Defaults to 0.95"""
-
-
-class StrategyTopKSamplingStrategy(BaseModel):
-    top_k: int
-    """Number of top tokens to consider for sampling. Must be at least 1"""
-
-    type: Literal["top_k"]
-    """Must be "top_k" to identify this sampling strategy"""
-
-
-Strategy: TypeAlias = Annotated[
-    Union[StrategyGreedySamplingStrategy, StrategyTopPSamplingStrategy, StrategyTopKSamplingStrategy],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class SamplingParams(BaseModel):
-    strategy: Strategy
-    """The sampling strategy."""
-
-    max_tokens: Optional[int] = None
-    """The maximum number of tokens that can be generated in the completion.
-
-    The token count of your prompt plus max_tokens cannot exceed the model's context
-    length.
-    """
-
-    repetition_penalty: Optional[float] = None
-    """Number between -2.0 and 2.0.
-
-    Positive values penalize new tokens based on whether they appear in the text so
-    far, increasing the model's likelihood to talk about new topics.
-    """
-
-    stop: Optional[List[str]] = None
-    """Up to 4 sequences where the API will stop generating further tokens.
-
-    The returned text will not contain the stop sequence.
-    """
diff --git a/src/llama_stack_client/types/shared_params/__init__.py b/src/llama_stack_client/types/shared_params/__init__.py
index 12061849..4ce940e0 100644
--- a/src/llama_stack_client/types/shared_params/__init__.py
+++ b/src/llama_stack_client/types/shared_params/__init__.py
@@ -9,12 +9,9 @@
 from .message import Message as Message
 from .document import Document as Document
 from .tool_call import ToolCall as ToolCall
-from .agent_config import AgentConfig as AgentConfig
 from .query_config import QueryConfig as QueryConfig
 from .user_message import UserMessage as UserMessage
 from .system_message import SystemMessage as SystemMessage
-from .response_format import ResponseFormat as ResponseFormat
-from .sampling_params import SamplingParams as SamplingParams
 from .completion_message import CompletionMessage as CompletionMessage
 from .interleaved_content import InterleavedContent as InterleavedContent
 from .tool_response_message import ToolResponseMessage as ToolResponseMessage
diff --git a/src/llama_stack_client/types/shared_params/agent_config.py b/src/llama_stack_client/types/shared_params/agent_config.py
deleted file mode 100644
index d444e5da..00000000
--- a/src/llama_stack_client/types/shared_params/agent_config.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from ..._types import SequenceNotStr
-from ..tool_def_param import ToolDefParam
-from .response_format import ResponseFormat
-from .sampling_params import SamplingParams
-
-__all__ = ["AgentConfig", "ToolConfig", "Toolgroup", "ToolgroupAgentToolGroupWithArgs"]
-
-
-class ToolConfig(TypedDict, total=False):
-    system_message_behavior: Literal["append", "replace"]
-    """(Optional) Config for how to override the default system prompt.
-
-    - `SystemMessageBehavior.append`: Appends the provided system message to the
-      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
-      system prompt with the provided system message. The system message can include
-      the string '{{function_definitions}}' to indicate where the function
-      definitions should be inserted.
-    """
-
-    tool_choice: Union[Literal["auto", "required", "none"], str]
-    """(Optional) Whether tool use is automatic, required, or none.
-
-    Can also specify a tool name to use a specific tool. Defaults to
-    ToolChoice.auto.
-    """
-
-    tool_prompt_format: Literal["json", "function_tag", "python_list"]
-    """(Optional) Instructs the model how to format tool calls.
-
-    By default, Llama Stack will attempt to use a format that is best adapted to the
-    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
-    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-    are output as Python syntax -- a list of function calls.
-    """
-
-
-class ToolgroupAgentToolGroupWithArgs(TypedDict, total=False):
-    args: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-
-    name: Required[str]
-
-
-Toolgroup: TypeAlias = Union[str, ToolgroupAgentToolGroupWithArgs]
-
-
-class AgentConfig(TypedDict, total=False):
-    instructions: Required[str]
-    """The system instructions for the agent"""
-
-    model: Required[str]
-    """The model identifier to use for the agent"""
-
-    client_tools: Iterable[ToolDefParam]
-
-    enable_session_persistence: bool
-    """Optional flag indicating whether session data has to be persisted"""
-
-    input_shields: SequenceNotStr[str]
-
-    max_infer_iters: int
-
-    name: str
-    """Optional name for the agent, used in telemetry and identification"""
-
-    output_shields: SequenceNotStr[str]
-
-    response_format: ResponseFormat
-    """Optional response format configuration"""
-
-    sampling_params: SamplingParams
-    """Sampling parameters."""
-
-    tool_choice: Literal["auto", "required", "none"]
-    """Whether tool use is required or automatic.
-
-    This is a hint to the model which may not be followed. It depends on the
-    Instruction Following capabilities of the model.
-    """
-
-    tool_config: ToolConfig
-    """Configuration for tool use."""
-
-    tool_prompt_format: Literal["json", "function_tag", "python_list"]
-    """Prompt format for calling custom / zero shot tools."""
-
-    toolgroups: SequenceNotStr[Toolgroup]
diff --git a/src/llama_stack_client/types/shared_params/response_format.py b/src/llama_stack_client/types/shared_params/response_format.py
deleted file mode 100644
index c3146dfc..00000000
--- a/src/llama_stack_client/types/shared_params/response_format.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-__all__ = ["ResponseFormat", "JsonSchemaResponseFormat", "GrammarResponseFormat"]
-
-
-class JsonSchemaResponseFormat(TypedDict, total=False):
-    json_schema: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """The JSON schema the response should conform to.
-
-    In a Python SDK, this is often a `pydantic` model.
-    """
-
-    type: Required[Literal["json_schema"]]
-    """Must be "json_schema" to identify this format type"""
-
-
-class GrammarResponseFormat(TypedDict, total=False):
-    bnf: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """The BNF grammar specification the response should conform to"""
-
-    type: Required[Literal["grammar"]]
-    """Must be "grammar" to identify this format type"""
-
-
-ResponseFormat: TypeAlias = Union[JsonSchemaResponseFormat, GrammarResponseFormat]
diff --git a/src/llama_stack_client/types/shared_params/sampling_params.py b/src/llama_stack_client/types/shared_params/sampling_params.py
deleted file mode 100644
index 9be15690..00000000
--- a/src/llama_stack_client/types/shared_params/sampling_params.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from ..._types import SequenceNotStr
-
-__all__ = [
-    "SamplingParams",
-    "Strategy",
-    "StrategyGreedySamplingStrategy",
-    "StrategyTopPSamplingStrategy",
-    "StrategyTopKSamplingStrategy",
-]
-
-
-class StrategyGreedySamplingStrategy(TypedDict, total=False):
-    type: Required[Literal["greedy"]]
-    """Must be "greedy" to identify this sampling strategy"""
-
-
-class StrategyTopPSamplingStrategy(TypedDict, total=False):
-    type: Required[Literal["top_p"]]
-    """Must be "top_p" to identify this sampling strategy"""
-
-    temperature: float
-    """Controls randomness in sampling. Higher values increase randomness"""
-
-    top_p: float
-    """Cumulative probability threshold for nucleus sampling. Defaults to 0.95"""
-
-
-class StrategyTopKSamplingStrategy(TypedDict, total=False):
-    top_k: Required[int]
-    """Number of top tokens to consider for sampling. Must be at least 1"""
-
-    type: Required[Literal["top_k"]]
-    """Must be "top_k" to identify this sampling strategy"""
-
-
-Strategy: TypeAlias = Union[StrategyGreedySamplingStrategy, StrategyTopPSamplingStrategy, StrategyTopKSamplingStrategy]
-
-
-class SamplingParams(TypedDict, total=False):
-    strategy: Required[Strategy]
-    """The sampling strategy."""
-
-    max_tokens: int
-    """The maximum number of tokens that can be generated in the completion.
-
-    The token count of your prompt plus max_tokens cannot exceed the model's context
-    length.
-    """
-
-    repetition_penalty: float
-    """Number between -2.0 and 2.0.
-
-    Positive values penalize new tokens based on whether they appear in the text so
-    far, increasing the model's likelihood to talk about new topics.
-    """
-
-    stop: SequenceNotStr[str]
-    """Up to 4 sequences where the API will stop generating further tokens.
-
-    The returned text will not contain the stop sequence.
-    """
diff --git a/src/llama_stack_client/types/tool_def_param.py b/src/llama_stack_client/types/tool_def_param.py
deleted file mode 100644
index 99e7def1..00000000
--- a/src/llama_stack_client/types/tool_def_param.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-__all__ = ["ToolDefParam"]
-
-
-class ToolDefParam(TypedDict, total=False):
-    name: Required[str]
-    """Name of the tool"""
-
-    description: str
-    """(Optional) Human-readable description of what the tool does"""
-
-    input_schema: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """(Optional) JSON Schema for tool inputs (MCP inputSchema)"""
-
-    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """(Optional) Additional metadata about the tool"""
-
-    output_schema: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """(Optional) JSON Schema for tool outputs (MCP outputSchema)"""
-
-    toolgroup_id: str
-    """(Optional) ID of the tool group this tool belongs to"""
diff --git a/tests/api_resources/alpha/__init__.py b/tests/api_resources/alpha/__init__.py
deleted file mode 100644
index 6a8e62e9..00000000
--- a/tests/api_resources/alpha/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/alpha/agents/__init__.py b/tests/api_resources/alpha/agents/__init__.py
deleted file mode 100644
index 6a8e62e9..00000000
--- a/tests/api_resources/alpha/agents/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/alpha/agents/test_session.py b/tests/api_resources/alpha/agents/test_session.py
deleted file mode 100644
index 554c2d4e..00000000
--- a/tests/api_resources/alpha/agents/test_session.py
+++ /dev/null
@@ -1,422 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.alpha.agents import (
-    Session,
-    SessionListResponse,
-    SessionCreateResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestSession:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create(self, client: LlamaStackClient) -> None:
-        session = client.alpha.agents.session.create(
-            agent_id="agent_id",
-            session_name="session_name",
-        )
-        assert_matches_type(SessionCreateResponse, session, path=["response"])
-
-    @parametrize
-    def test_raw_response_create(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.session.with_raw_response.create(
-            agent_id="agent_id",
-            session_name="session_name",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = response.parse()
-        assert_matches_type(SessionCreateResponse, session, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.session.with_streaming_response.create(
-            agent_id="agent_id",
-            session_name="session_name",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = response.parse()
-            assert_matches_type(SessionCreateResponse, session, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_create(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.alpha.agents.session.with_raw_response.create(
-                agent_id="",
-                session_name="session_name",
-            )
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        session = client.alpha.agents.session.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-        assert_matches_type(Session, session, path=["response"])
-
-    @parametrize
-    def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
-        session = client.alpha.agents.session.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-            turn_ids=["string"],
-        )
-        assert_matches_type(Session, session, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.session.with_raw_response.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = response.parse()
-        assert_matches_type(Session, session, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.session.with_streaming_response.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = response.parse()
-            assert_matches_type(Session, session, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.alpha.agents.session.with_raw_response.retrieve(
-                session_id="session_id",
-                agent_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.alpha.agents.session.with_raw_response.retrieve(
-                session_id="",
-                agent_id="agent_id",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        session = client.alpha.agents.session.list(
-            agent_id="agent_id",
-        )
-        assert_matches_type(SessionListResponse, session, path=["response"])
-
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        session = client.alpha.agents.session.list(
-            agent_id="agent_id",
-            limit=0,
-            start_index=0,
-        )
-        assert_matches_type(SessionListResponse, session, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.session.with_raw_response.list(
-            agent_id="agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = response.parse()
-        assert_matches_type(SessionListResponse, session, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.session.with_streaming_response.list(
-            agent_id="agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = response.parse()
-            assert_matches_type(SessionListResponse, session, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_list(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.alpha.agents.session.with_raw_response.list(
-                agent_id="",
-            )
-
-    @parametrize
-    def test_method_delete(self, client: LlamaStackClient) -> None:
-        session = client.alpha.agents.session.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-        assert session is None
-
-    @parametrize
-    def test_raw_response_delete(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.session.with_raw_response.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = response.parse()
-        assert session is None
-
-    @parametrize
-    def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.session.with_streaming_response.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = response.parse()
-            assert session is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_delete(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.alpha.agents.session.with_raw_response.delete(
-                session_id="session_id",
-                agent_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.alpha.agents.session.with_raw_response.delete(
-                session_id="",
-                agent_id="agent_id",
-            )
-
-
-class TestAsyncSession:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.alpha.agents.session.create(
-            agent_id="agent_id",
-            session_name="session_name",
-        )
-        assert_matches_type(SessionCreateResponse, session, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.session.with_raw_response.create(
-            agent_id="agent_id",
-            session_name="session_name",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = await response.parse()
-        assert_matches_type(SessionCreateResponse, session, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.session.with_streaming_response.create(
-            agent_id="agent_id",
-            session_name="session_name",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = await response.parse()
-            assert_matches_type(SessionCreateResponse, session, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_create(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.alpha.agents.session.with_raw_response.create(
-                agent_id="",
-                session_name="session_name",
-            )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.alpha.agents.session.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-        assert_matches_type(Session, session, path=["response"])
-
-    @parametrize
-    async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.alpha.agents.session.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-            turn_ids=["string"],
-        )
-        assert_matches_type(Session, session, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.session.with_raw_response.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = await response.parse()
-        assert_matches_type(Session, session, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.session.with_streaming_response.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = await response.parse()
-            assert_matches_type(Session, session, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.alpha.agents.session.with_raw_response.retrieve(
-                session_id="session_id",
-                agent_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.alpha.agents.session.with_raw_response.retrieve(
-                session_id="",
-                agent_id="agent_id",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.alpha.agents.session.list(
-            agent_id="agent_id",
-        )
-        assert_matches_type(SessionListResponse, session, path=["response"])
-
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.alpha.agents.session.list(
-            agent_id="agent_id",
-            limit=0,
-            start_index=0,
-        )
-        assert_matches_type(SessionListResponse, session, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.session.with_raw_response.list(
-            agent_id="agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = await response.parse()
-        assert_matches_type(SessionListResponse, session, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.session.with_streaming_response.list(
-            agent_id="agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = await response.parse()
-            assert_matches_type(SessionListResponse, session, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_list(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.alpha.agents.session.with_raw_response.list(
-                agent_id="",
-            )
-
-    @parametrize
-    async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.alpha.agents.session.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-        assert session is None
-
-    @parametrize
-    async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.session.with_raw_response.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = await response.parse()
-        assert session is None
-
-    @parametrize
-    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.session.with_streaming_response.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = await response.parse()
-            assert session is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.alpha.agents.session.with_raw_response.delete(
-                session_id="session_id",
-                agent_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.alpha.agents.session.with_raw_response.delete(
-                session_id="",
-                agent_id="agent_id",
-            )
diff --git a/tests/api_resources/alpha/agents/test_steps.py b/tests/api_resources/alpha/agents/test_steps.py
deleted file mode 100644
index c001dd23..00000000
--- a/tests/api_resources/alpha/agents/test_steps.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.alpha.agents import StepRetrieveResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestSteps:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        step = client.alpha.agents.steps.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-        )
-        assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.steps.with_raw_response.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        step = response.parse()
-        assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.steps.with_streaming_response.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            step = response.parse()
-            assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.alpha.agents.steps.with_raw_response.retrieve(
-                step_id="step_id",
-                agent_id="",
-                session_id="session_id",
-                turn_id="turn_id",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.alpha.agents.steps.with_raw_response.retrieve(
-                step_id="step_id",
-                agent_id="agent_id",
-                session_id="",
-                turn_id="turn_id",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.alpha.agents.steps.with_raw_response.retrieve(
-                step_id="step_id",
-                agent_id="agent_id",
-                session_id="session_id",
-                turn_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
-            client.alpha.agents.steps.with_raw_response.retrieve(
-                step_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-                turn_id="turn_id",
-            )
-
-
-class TestAsyncSteps:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        step = await async_client.alpha.agents.steps.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-        )
-        assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.steps.with_raw_response.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        step = await response.parse()
-        assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.steps.with_streaming_response.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            step = await response.parse()
-            assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.alpha.agents.steps.with_raw_response.retrieve(
-                step_id="step_id",
-                agent_id="",
-                session_id="session_id",
-                turn_id="turn_id",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.alpha.agents.steps.with_raw_response.retrieve(
-                step_id="step_id",
-                agent_id="agent_id",
-                session_id="",
-                turn_id="turn_id",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.alpha.agents.steps.with_raw_response.retrieve(
-                step_id="step_id",
-                agent_id="agent_id",
-                session_id="session_id",
-                turn_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
-            await async_client.alpha.agents.steps.with_raw_response.retrieve(
-                step_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-                turn_id="turn_id",
-            )
diff --git a/tests/api_resources/alpha/agents/test_turn.py b/tests/api_resources/alpha/agents/test_turn.py
deleted file mode 100644
index 26f4a7b7..00000000
--- a/tests/api_resources/alpha/agents/test_turn.py
+++ /dev/null
@@ -1,1036 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.alpha.agents import Turn
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestTurn:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.alpha.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.alpha.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            documents=[
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                }
-            ],
-            stream=False,
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            toolgroups=["string"],
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.turn.with_raw_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        turn = response.parse()
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.turn.with_streaming_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            turn = response.parse()
-            assert_matches_type(Turn, turn, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_create_overload_1(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.alpha.agents.turn.with_raw_response.create(
-                session_id="session_id",
-                agent_id="",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.alpha.agents.turn.with_raw_response.create(
-                session_id="",
-                agent_id="agent_id",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-            )
-
-    @parametrize
-    def test_method_create_overload_2(self, client: LlamaStackClient) -> None:
-        turn_stream = client.alpha.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            stream=True,
-        )
-        turn_stream.response.close()
-
-    @parametrize
-    def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient) -> None:
-        turn_stream = client.alpha.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            stream=True,
-            documents=[
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                }
-            ],
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            toolgroups=["string"],
-        )
-        turn_stream.response.close()
-
-    @parametrize
-    def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.turn.with_raw_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = response.parse()
-        stream.close()
-
-    @parametrize
-    def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.turn.with_streaming_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = response.parse()
-            stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_create_overload_2(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.alpha.agents.turn.with_raw_response.create(
-                session_id="session_id",
-                agent_id="",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-                stream=True,
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.alpha.agents.turn.with_raw_response.create(
-                session_id="",
-                agent_id="agent_id",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-                stream=True,
-            )
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        turn = client.alpha.agents.turn.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.turn.with_raw_response.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        turn = response.parse()
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.turn.with_streaming_response.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            turn = response.parse()
-            assert_matches_type(Turn, turn, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.alpha.agents.turn.with_raw_response.retrieve(
-                turn_id="turn_id",
-                agent_id="",
-                session_id="session_id",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.alpha.agents.turn.with_raw_response.retrieve(
-                turn_id="turn_id",
-                agent_id="agent_id",
-                session_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.alpha.agents.turn.with_raw_response.retrieve(
-                turn_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-            )
-
-    @parametrize
-    def test_method_resume_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.alpha.agents.turn.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_method_resume_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.alpha.agents.turn.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                    "metadata": {"foo": True},
-                }
-            ],
-            stream=False,
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_raw_response_resume_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.turn.with_raw_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        turn = response.parse()
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_streaming_response_resume_overload_1(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.turn.with_streaming_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            turn = response.parse()
-            assert_matches_type(Turn, turn, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_resume_overload_1(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.alpha.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="",
-                session_id="session_id",
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.alpha.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="agent_id",
-                session_id="",
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.alpha.agents.turn.with_raw_response.resume(
-                turn_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-    @parametrize
-    def test_method_resume_overload_2(self, client: LlamaStackClient) -> None:
-        turn_stream = client.alpha.agents.turn.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            stream=True,
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-        turn_stream.response.close()
-
-    @parametrize
-    def test_raw_response_resume_overload_2(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.turn.with_raw_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            stream=True,
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = response.parse()
-        stream.close()
-
-    @parametrize
-    def test_streaming_response_resume_overload_2(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.turn.with_streaming_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            stream=True,
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = response.parse()
-            stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_resume_overload_2(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.alpha.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="",
-                session_id="session_id",
-                stream=True,
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.alpha.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="agent_id",
-                session_id="",
-                stream=True,
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.alpha.agents.turn.with_raw_response.resume(
-                turn_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-                stream=True,
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-
-class TestAsyncTurn:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.alpha.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.alpha.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            documents=[
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                }
-            ],
-            stream=False,
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            toolgroups=["string"],
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.turn.with_raw_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        turn = await response.parse()
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.turn.with_streaming_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            turn = await response.parse()
-            assert_matches_type(Turn, turn, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.alpha.agents.turn.with_raw_response.create(
-                session_id="session_id",
-                agent_id="",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.alpha.agents.turn.with_raw_response.create(
-                session_id="",
-                agent_id="agent_id",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-            )
-
-    @parametrize
-    async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        turn_stream = await async_client.alpha.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            stream=True,
-        )
-        await turn_stream.response.aclose()
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        turn_stream = await async_client.alpha.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            stream=True,
-            documents=[
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                }
-            ],
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            toolgroups=["string"],
-        )
-        await turn_stream.response.aclose()
-
-    @parametrize
-    async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.turn.with_raw_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = await response.parse()
-        await stream.close()
-
-    @parametrize
-    async def test_streaming_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.turn.with_streaming_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = await response.parse()
-            await stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.alpha.agents.turn.with_raw_response.create(
-                session_id="session_id",
-                agent_id="",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-                stream=True,
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.alpha.agents.turn.with_raw_response.create(
-                session_id="",
-                agent_id="agent_id",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-                stream=True,
-            )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.alpha.agents.turn.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.turn.with_raw_response.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        turn = await response.parse()
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.turn.with_streaming_response.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            turn = await response.parse()
-            assert_matches_type(Turn, turn, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.alpha.agents.turn.with_raw_response.retrieve(
-                turn_id="turn_id",
-                agent_id="",
-                session_id="session_id",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.alpha.agents.turn.with_raw_response.retrieve(
-                turn_id="turn_id",
-                agent_id="agent_id",
-                session_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.alpha.agents.turn.with_raw_response.retrieve(
-                turn_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-            )
-
-    @parametrize
-    async def test_method_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.alpha.agents.turn.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_method_resume_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.alpha.agents.turn.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                    "metadata": {"foo": True},
-                }
-            ],
-            stream=False,
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_raw_response_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.turn.with_raw_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        turn = await response.parse()
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.turn.with_streaming_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            turn = await response.parse()
-            assert_matches_type(Turn, turn, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.alpha.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="",
-                session_id="session_id",
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.alpha.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="agent_id",
-                session_id="",
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.alpha.agents.turn.with_raw_response.resume(
-                turn_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-    @parametrize
-    async def test_method_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        turn_stream = await async_client.alpha.agents.turn.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            stream=True,
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-        await turn_stream.response.aclose()
-
-    @parametrize
-    async def test_raw_response_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.turn.with_raw_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            stream=True,
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = await response.parse()
-        await stream.close()
-
-    @parametrize
-    async def test_streaming_response_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.turn.with_streaming_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            stream=True,
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = await response.parse()
-            await stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.alpha.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="",
-                session_id="session_id",
-                stream=True,
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.alpha.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="agent_id",
-                session_id="",
-                stream=True,
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.alpha.agents.turn.with_raw_response.resume(
-                turn_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-                stream=True,
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
diff --git a/tests/api_resources/alpha/eval/__init__.py b/tests/api_resources/alpha/eval/__init__.py
deleted file mode 100644
index 6a8e62e9..00000000
--- a/tests/api_resources/alpha/eval/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/alpha/eval/test_jobs.py b/tests/api_resources/alpha/eval/test_jobs.py
deleted file mode 100644
index 42844d80..00000000
--- a/tests/api_resources/alpha/eval/test_jobs.py
+++ /dev/null
@@ -1,318 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.alpha import Job, EvaluateResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestJobs:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        job = client.alpha.eval.jobs.retrieve(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-        assert_matches_type(EvaluateResponse, job, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.alpha.eval.jobs.with_raw_response.retrieve(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert_matches_type(EvaluateResponse, job, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.alpha.eval.jobs.with_streaming_response.retrieve(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert_matches_type(EvaluateResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.alpha.eval.jobs.with_raw_response.retrieve(
-                job_id="job_id",
-                benchmark_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            client.alpha.eval.jobs.with_raw_response.retrieve(
-                job_id="",
-                benchmark_id="benchmark_id",
-            )
-
-    @parametrize
-    def test_method_cancel(self, client: LlamaStackClient) -> None:
-        job = client.alpha.eval.jobs.cancel(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-        assert job is None
-
-    @parametrize
-    def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
-        response = client.alpha.eval.jobs.with_raw_response.cancel(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert job is None
-
-    @parametrize
-    def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
-        with client.alpha.eval.jobs.with_streaming_response.cancel(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert job is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_cancel(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.alpha.eval.jobs.with_raw_response.cancel(
-                job_id="job_id",
-                benchmark_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            client.alpha.eval.jobs.with_raw_response.cancel(
-                job_id="",
-                benchmark_id="benchmark_id",
-            )
-
-    @parametrize
-    def test_method_status(self, client: LlamaStackClient) -> None:
-        job = client.alpha.eval.jobs.status(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-        assert_matches_type(Job, job, path=["response"])
-
-    @parametrize
-    def test_raw_response_status(self, client: LlamaStackClient) -> None:
-        response = client.alpha.eval.jobs.with_raw_response.status(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert_matches_type(Job, job, path=["response"])
-
-    @parametrize
-    def test_streaming_response_status(self, client: LlamaStackClient) -> None:
-        with client.alpha.eval.jobs.with_streaming_response.status(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert_matches_type(Job, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_status(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.alpha.eval.jobs.with_raw_response.status(
-                job_id="job_id",
-                benchmark_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            client.alpha.eval.jobs.with_raw_response.status(
-                job_id="",
-                benchmark_id="benchmark_id",
-            )
-
-
-class TestAsyncJobs:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.alpha.eval.jobs.retrieve(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-        assert_matches_type(EvaluateResponse, job, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.eval.jobs.with_raw_response.retrieve(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert_matches_type(EvaluateResponse, job, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.eval.jobs.with_streaming_response.retrieve(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert_matches_type(EvaluateResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.alpha.eval.jobs.with_raw_response.retrieve(
-                job_id="job_id",
-                benchmark_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            await async_client.alpha.eval.jobs.with_raw_response.retrieve(
-                job_id="",
-                benchmark_id="benchmark_id",
-            )
-
-    @parametrize
-    async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.alpha.eval.jobs.cancel(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-        assert job is None
-
-    @parametrize
-    async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.eval.jobs.with_raw_response.cancel(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert job is None
-
-    @parametrize
-    async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.eval.jobs.with_streaming_response.cancel(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert job is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.alpha.eval.jobs.with_raw_response.cancel(
-                job_id="job_id",
-                benchmark_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            await async_client.alpha.eval.jobs.with_raw_response.cancel(
-                job_id="",
-                benchmark_id="benchmark_id",
-            )
-
-    @parametrize
-    async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.alpha.eval.jobs.status(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-        assert_matches_type(Job, job, path=["response"])
-
-    @parametrize
-    async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.eval.jobs.with_raw_response.status(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert_matches_type(Job, job, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.eval.jobs.with_streaming_response.status(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert_matches_type(Job, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_status(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.alpha.eval.jobs.with_raw_response.status(
-                job_id="job_id",
-                benchmark_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            await async_client.alpha.eval.jobs.with_raw_response.status(
-                job_id="",
-                benchmark_id="benchmark_id",
-            )
diff --git a/tests/api_resources/alpha/post_training/__init__.py b/tests/api_resources/alpha/post_training/__init__.py
deleted file mode 100644
index 6a8e62e9..00000000
--- a/tests/api_resources/alpha/post_training/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/alpha/post_training/test_job.py b/tests/api_resources/alpha/post_training/test_job.py
deleted file mode 100644
index 611bf4b6..00000000
--- a/tests/api_resources/alpha/post_training/test_job.py
+++ /dev/null
@@ -1,270 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.alpha.post_training import (
-    JobListResponse,
-    JobStatusResponse,
-    JobArtifactsResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestJob:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        job = client.alpha.post_training.job.list()
-        assert_matches_type(JobListResponse, job, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.alpha.post_training.job.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert_matches_type(JobListResponse, job, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.alpha.post_training.job.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert_matches_type(JobListResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_artifacts(self, client: LlamaStackClient) -> None:
-        job = client.alpha.post_training.job.artifacts(
-            job_uuid="job_uuid",
-        )
-        assert_matches_type(JobArtifactsResponse, job, path=["response"])
-
-    @parametrize
-    def test_raw_response_artifacts(self, client: LlamaStackClient) -> None:
-        response = client.alpha.post_training.job.with_raw_response.artifacts(
-            job_uuid="job_uuid",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert_matches_type(JobArtifactsResponse, job, path=["response"])
-
-    @parametrize
-    def test_streaming_response_artifacts(self, client: LlamaStackClient) -> None:
-        with client.alpha.post_training.job.with_streaming_response.artifacts(
-            job_uuid="job_uuid",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert_matches_type(JobArtifactsResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_cancel(self, client: LlamaStackClient) -> None:
-        job = client.alpha.post_training.job.cancel(
-            job_uuid="job_uuid",
-        )
-        assert job is None
-
-    @parametrize
-    def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
-        response = client.alpha.post_training.job.with_raw_response.cancel(
-            job_uuid="job_uuid",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert job is None
-
-    @parametrize
-    def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
-        with client.alpha.post_training.job.with_streaming_response.cancel(
-            job_uuid="job_uuid",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert job is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_status(self, client: LlamaStackClient) -> None:
-        job = client.alpha.post_training.job.status(
-            job_uuid="job_uuid",
-        )
-        assert_matches_type(JobStatusResponse, job, path=["response"])
-
-    @parametrize
-    def test_raw_response_status(self, client: LlamaStackClient) -> None:
-        response = client.alpha.post_training.job.with_raw_response.status(
-            job_uuid="job_uuid",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert_matches_type(JobStatusResponse, job, path=["response"])
-
-    @parametrize
-    def test_streaming_response_status(self, client: LlamaStackClient) -> None:
-        with client.alpha.post_training.job.with_streaming_response.status(
-            job_uuid="job_uuid",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert_matches_type(JobStatusResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncJob:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.alpha.post_training.job.list()
-        assert_matches_type(JobListResponse, job, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.post_training.job.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert_matches_type(JobListResponse, job, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.post_training.job.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert_matches_type(JobListResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.alpha.post_training.job.artifacts(
-            job_uuid="job_uuid",
-        )
-        assert_matches_type(JobArtifactsResponse, job, path=["response"])
-
-    @parametrize
-    async def test_raw_response_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.post_training.job.with_raw_response.artifacts(
-            job_uuid="job_uuid",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert_matches_type(JobArtifactsResponse, job, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.post_training.job.with_streaming_response.artifacts(
-            job_uuid="job_uuid",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert_matches_type(JobArtifactsResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.alpha.post_training.job.cancel(
-            job_uuid="job_uuid",
-        )
-        assert job is None
-
-    @parametrize
-    async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.post_training.job.with_raw_response.cancel(
-            job_uuid="job_uuid",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert job is None
-
-    @parametrize
-    async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.post_training.job.with_streaming_response.cancel(
-            job_uuid="job_uuid",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert job is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.alpha.post_training.job.status(
-            job_uuid="job_uuid",
-        )
-        assert_matches_type(JobStatusResponse, job, path=["response"])
-
-    @parametrize
-    async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.post_training.job.with_raw_response.status(
-            job_uuid="job_uuid",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert_matches_type(JobStatusResponse, job, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.post_training.job.with_streaming_response.status(
-            job_uuid="job_uuid",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert_matches_type(JobStatusResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/alpha/test_agents.py b/tests/api_resources/alpha/test_agents.py
deleted file mode 100644
index 3324871c..00000000
--- a/tests/api_resources/alpha/test_agents.py
+++ /dev/null
@@ -1,418 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.alpha import (
-    AgentListResponse,
-    AgentCreateResponse,
-    AgentRetrieveResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestAgents:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create(self, client: LlamaStackClient) -> None:
-        agent = client.alpha.agents.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-            },
-        )
-        assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
-        agent = client.alpha.agents.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-                "client_tools": [
-                    {
-                        "name": "name",
-                        "description": "description",
-                        "input_schema": {"foo": True},
-                        "metadata": {"foo": True},
-                        "output_schema": {"foo": True},
-                        "toolgroup_id": "toolgroup_id",
-                    }
-                ],
-                "enable_session_persistence": True,
-                "input_shields": ["string"],
-                "max_infer_iters": 0,
-                "name": "name",
-                "output_shields": ["string"],
-                "response_format": {
-                    "json_schema": {"foo": True},
-                    "type": "json_schema",
-                },
-                "sampling_params": {
-                    "strategy": {"type": "greedy"},
-                    "max_tokens": 0,
-                    "repetition_penalty": 0,
-                    "stop": ["string"],
-                },
-                "tool_choice": "auto",
-                "tool_config": {
-                    "system_message_behavior": "append",
-                    "tool_choice": "auto",
-                    "tool_prompt_format": "json",
-                },
-                "tool_prompt_format": "json",
-                "toolgroups": ["string"],
-            },
-        )
-        assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-    @parametrize
-    def test_raw_response_create(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.with_raw_response.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        agent = response.parse()
-        assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.with_streaming_response.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            agent = response.parse()
-            assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        agent = client.alpha.agents.retrieve(
-            "agent_id",
-        )
-        assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.with_raw_response.retrieve(
-            "agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        agent = response.parse()
-        assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.with_streaming_response.retrieve(
-            "agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            agent = response.parse()
-            assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.alpha.agents.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        agent = client.alpha.agents.list()
-        assert_matches_type(AgentListResponse, agent, path=["response"])
-
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        agent = client.alpha.agents.list(
-            limit=0,
-            start_index=0,
-        )
-        assert_matches_type(AgentListResponse, agent, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        agent = response.parse()
-        assert_matches_type(AgentListResponse, agent, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            agent = response.parse()
-            assert_matches_type(AgentListResponse, agent, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_delete(self, client: LlamaStackClient) -> None:
-        agent = client.alpha.agents.delete(
-            "agent_id",
-        )
-        assert agent is None
-
-    @parametrize
-    def test_raw_response_delete(self, client: LlamaStackClient) -> None:
-        response = client.alpha.agents.with_raw_response.delete(
-            "agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        agent = response.parse()
-        assert agent is None
-
-    @parametrize
-    def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
-        with client.alpha.agents.with_streaming_response.delete(
-            "agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            agent = response.parse()
-            assert agent is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_delete(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.alpha.agents.with_raw_response.delete(
-                "",
-            )
-
-
-class TestAsyncAgents:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.alpha.agents.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-            },
-        )
-        assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.alpha.agents.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-                "client_tools": [
-                    {
-                        "name": "name",
-                        "description": "description",
-                        "input_schema": {"foo": True},
-                        "metadata": {"foo": True},
-                        "output_schema": {"foo": True},
-                        "toolgroup_id": "toolgroup_id",
-                    }
-                ],
-                "enable_session_persistence": True,
-                "input_shields": ["string"],
-                "max_infer_iters": 0,
-                "name": "name",
-                "output_shields": ["string"],
-                "response_format": {
-                    "json_schema": {"foo": True},
-                    "type": "json_schema",
-                },
-                "sampling_params": {
-                    "strategy": {"type": "greedy"},
-                    "max_tokens": 0,
-                    "repetition_penalty": 0,
-                    "stop": ["string"],
-                },
-                "tool_choice": "auto",
-                "tool_config": {
-                    "system_message_behavior": "append",
-                    "tool_choice": "auto",
-                    "tool_prompt_format": "json",
-                },
-                "tool_prompt_format": "json",
-                "toolgroups": ["string"],
-            },
-        )
-        assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.with_raw_response.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        agent = await response.parse()
-        assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.with_streaming_response.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            agent = await response.parse()
-            assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.alpha.agents.retrieve(
-            "agent_id",
-        )
-        assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.with_raw_response.retrieve(
-            "agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        agent = await response.parse()
-        assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.with_streaming_response.retrieve(
-            "agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            agent = await response.parse()
-            assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.alpha.agents.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.alpha.agents.list()
-        assert_matches_type(AgentListResponse, agent, path=["response"])
-
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.alpha.agents.list(
-            limit=0,
-            start_index=0,
-        )
-        assert_matches_type(AgentListResponse, agent, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        agent = await response.parse()
-        assert_matches_type(AgentListResponse, agent, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            agent = await response.parse()
-            assert_matches_type(AgentListResponse, agent, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.alpha.agents.delete(
-            "agent_id",
-        )
-        assert agent is None
-
-    @parametrize
-    async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.agents.with_raw_response.delete(
-            "agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        agent = await response.parse()
-        assert agent is None
-
-    @parametrize
-    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.agents.with_streaming_response.delete(
-            "agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            agent = await response.parse()
-            assert agent is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.alpha.agents.with_raw_response.delete(
-                "",
-            )
diff --git a/tests/api_resources/alpha/test_benchmarks.py b/tests/api_resources/alpha/test_benchmarks.py
deleted file mode 100644
index 71ad6bc2..00000000
--- a/tests/api_resources/alpha/test_benchmarks.py
+++ /dev/null
@@ -1,254 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.alpha import Benchmark, BenchmarkListResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestBenchmarks:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        benchmark = client.alpha.benchmarks.retrieve(
-            "benchmark_id",
-        )
-        assert_matches_type(Benchmark, benchmark, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.alpha.benchmarks.with_raw_response.retrieve(
-            "benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        benchmark = response.parse()
-        assert_matches_type(Benchmark, benchmark, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.alpha.benchmarks.with_streaming_response.retrieve(
-            "benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            benchmark = response.parse()
-            assert_matches_type(Benchmark, benchmark, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.alpha.benchmarks.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        benchmark = client.alpha.benchmarks.list()
-        assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.alpha.benchmarks.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        benchmark = response.parse()
-        assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.alpha.benchmarks.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            benchmark = response.parse()
-            assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_register(self, client: LlamaStackClient) -> None:
-        benchmark = client.alpha.benchmarks.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-        )
-        assert benchmark is None
-
-    @parametrize
-    def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
-        benchmark = client.alpha.benchmarks.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-            metadata={"foo": True},
-            provider_benchmark_id="provider_benchmark_id",
-            provider_id="provider_id",
-        )
-        assert benchmark is None
-
-    @parametrize
-    def test_raw_response_register(self, client: LlamaStackClient) -> None:
-        response = client.alpha.benchmarks.with_raw_response.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        benchmark = response.parse()
-        assert benchmark is None
-
-    @parametrize
-    def test_streaming_response_register(self, client: LlamaStackClient) -> None:
-        with client.alpha.benchmarks.with_streaming_response.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            benchmark = response.parse()
-            assert benchmark is None
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncBenchmarks:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        benchmark = await async_client.alpha.benchmarks.retrieve(
-            "benchmark_id",
-        )
-        assert_matches_type(Benchmark, benchmark, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.benchmarks.with_raw_response.retrieve(
-            "benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        benchmark = await response.parse()
-        assert_matches_type(Benchmark, benchmark, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.benchmarks.with_streaming_response.retrieve(
-            "benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            benchmark = await response.parse()
-            assert_matches_type(Benchmark, benchmark, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.alpha.benchmarks.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        benchmark = await async_client.alpha.benchmarks.list()
-        assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.benchmarks.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        benchmark = await response.parse()
-        assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.benchmarks.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            benchmark = await response.parse()
-            assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None:
-        benchmark = await async_client.alpha.benchmarks.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-        )
-        assert benchmark is None
-
-    @parametrize
-    async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        benchmark = await async_client.alpha.benchmarks.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-            metadata={"foo": True},
-            provider_benchmark_id="provider_benchmark_id",
-            provider_id="provider_id",
-        )
-        assert benchmark is None
-
-    @parametrize
-    async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.benchmarks.with_raw_response.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        benchmark = await response.parse()
-        assert benchmark is None
-
-    @parametrize
-    async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.benchmarks.with_streaming_response.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            benchmark = await response.parse()
-            assert benchmark is None
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/alpha/test_eval.py b/tests/api_resources/alpha/test_eval.py
deleted file mode 100644
index 1ee20010..00000000
--- a/tests/api_resources/alpha/test_eval.py
+++ /dev/null
@@ -1,1121 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.alpha import (
-    Job,
-    EvaluateResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestEval:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_evaluate_rows(self, client: LlamaStackClient) -> None:
-        eval = client.alpha.eval.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.alpha.eval.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None:
-        response = client.alpha.eval.with_raw_response.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = response.parse()
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> None:
-        with client.alpha.eval.with_streaming_response.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = response.parse()
-            assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.alpha.eval.with_raw_response.evaluate_rows(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-                input_rows=[{"foo": True}],
-                scoring_functions=["string"],
-            )
-
-    @parametrize
-    def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        eval = client.alpha.eval.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.alpha.eval.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        response = client.alpha.eval.with_raw_response.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = response.parse()
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    def test_streaming_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        with client.alpha.eval.with_streaming_response.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = response.parse()
-            assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.alpha.eval.with_raw_response.evaluate_rows_alpha(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-                input_rows=[{"foo": True}],
-                scoring_functions=["string"],
-            )
-
-    @parametrize
-    def test_method_run_eval(self, client: LlamaStackClient) -> None:
-        eval = client.alpha.eval.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.alpha.eval.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    def test_raw_response_run_eval(self, client: LlamaStackClient) -> None:
-        response = client.alpha.eval.with_raw_response.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = response.parse()
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None:
-        with client.alpha.eval.with_streaming_response.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = response.parse()
-            assert_matches_type(Job, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_run_eval(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.alpha.eval.with_raw_response.run_eval(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-            )
-
-    @parametrize
-    def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        eval = client.alpha.eval.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.alpha.eval.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        response = client.alpha.eval.with_raw_response.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = response.parse()
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        with client.alpha.eval.with_streaming_response.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = response.parse()
-            assert_matches_type(Job, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.alpha.eval.with_raw_response.run_eval_alpha(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-            )
-
-
-class TestAsyncEval:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.alpha.eval.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.alpha.eval.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.eval.with_raw_response.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = await response.parse()
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.eval.with_streaming_response.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = await response.parse()
-            assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.alpha.eval.with_raw_response.evaluate_rows(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-                input_rows=[{"foo": True}],
-                scoring_functions=["string"],
-            )
-
-    @parametrize
-    async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.alpha.eval.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.alpha.eval.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.eval.with_raw_response.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = await response.parse()
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.eval.with_streaming_response.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = await response.parse()
-            assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.alpha.eval.with_raw_response.evaluate_rows_alpha(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-                input_rows=[{"foo": True}],
-                scoring_functions=["string"],
-            )
-
-    @parametrize
-    async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.alpha.eval.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.alpha.eval.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.eval.with_raw_response.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = await response.parse()
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.eval.with_streaming_response.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = await response.parse()
-            assert_matches_type(Job, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.alpha.eval.with_raw_response.run_eval(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-            )
-
-    @parametrize
-    async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.alpha.eval.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.alpha.eval.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.eval.with_raw_response.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = await response.parse()
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.eval.with_streaming_response.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = await response.parse()
-            assert_matches_type(Job, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.alpha.eval.with_raw_response.run_eval_alpha(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-            )
diff --git a/tests/api_resources/alpha/test_inference.py b/tests/api_resources/alpha/test_inference.py
deleted file mode 100644
index d1308222..00000000
--- a/tests/api_resources/alpha/test_inference.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.alpha import InferenceRerankResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestInference:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_rerank(self, client: LlamaStackClient) -> None:
-        inference = client.alpha.inference.rerank(
-            items=["string"],
-            model="model",
-            query="string",
-        )
-        assert_matches_type(InferenceRerankResponse, inference, path=["response"])
-
-    @parametrize
-    def test_method_rerank_with_all_params(self, client: LlamaStackClient) -> None:
-        inference = client.alpha.inference.rerank(
-            items=["string"],
-            model="model",
-            query="string",
-            max_num_results=0,
-        )
-        assert_matches_type(InferenceRerankResponse, inference, path=["response"])
-
-    @parametrize
-    def test_raw_response_rerank(self, client: LlamaStackClient) -> None:
-        response = client.alpha.inference.with_raw_response.rerank(
-            items=["string"],
-            model="model",
-            query="string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inference = response.parse()
-        assert_matches_type(InferenceRerankResponse, inference, path=["response"])
-
-    @parametrize
-    def test_streaming_response_rerank(self, client: LlamaStackClient) -> None:
-        with client.alpha.inference.with_streaming_response.rerank(
-            items=["string"],
-            model="model",
-            query="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inference = response.parse()
-            assert_matches_type(InferenceRerankResponse, inference, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncInference:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_rerank(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.alpha.inference.rerank(
-            items=["string"],
-            model="model",
-            query="string",
-        )
-        assert_matches_type(InferenceRerankResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_method_rerank_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.alpha.inference.rerank(
-            items=["string"],
-            model="model",
-            query="string",
-            max_num_results=0,
-        )
-        assert_matches_type(InferenceRerankResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_raw_response_rerank(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.inference.with_raw_response.rerank(
-            items=["string"],
-            model="model",
-            query="string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inference = await response.parse()
-        assert_matches_type(InferenceRerankResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_rerank(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.inference.with_streaming_response.rerank(
-            items=["string"],
-            model="model",
-            query="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inference = await response.parse()
-            assert_matches_type(InferenceRerankResponse, inference, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/alpha/test_post_training.py b/tests/api_resources/alpha/test_post_training.py
deleted file mode 100644
index 92f45593..00000000
--- a/tests/api_resources/alpha/test_post_training.py
+++ /dev/null
@@ -1,452 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.alpha import (
-    PostTrainingJob,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestPostTraining:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_preference_optimize(self, client: LlamaStackClient) -> None:
-        post_training = client.alpha.post_training.preference_optimize(
-            algorithm_config={
-                "beta": 0,
-                "loss_type": "sigmoid",
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    def test_method_preference_optimize_with_all_params(self, client: LlamaStackClient) -> None:
-        post_training = client.alpha.post_training.preference_optimize(
-            algorithm_config={
-                "beta": 0,
-                "loss_type": "sigmoid",
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-                "data_config": {
-                    "batch_size": 0,
-                    "data_format": "instruct",
-                    "dataset_id": "dataset_id",
-                    "shuffle": True,
-                    "packed": True,
-                    "train_on_input": True,
-                    "validation_dataset_id": "validation_dataset_id",
-                },
-                "dtype": "dtype",
-                "efficiency_config": {
-                    "enable_activation_checkpointing": True,
-                    "enable_activation_offloading": True,
-                    "fsdp_cpu_offload": True,
-                    "memory_efficient_fsdp_wrap": True,
-                },
-                "max_validation_steps": 0,
-                "optimizer_config": {
-                    "lr": 0,
-                    "num_warmup_steps": 0,
-                    "optimizer_type": "adam",
-                    "weight_decay": 0,
-                },
-            },
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    def test_raw_response_preference_optimize(self, client: LlamaStackClient) -> None:
-        response = client.alpha.post_training.with_raw_response.preference_optimize(
-            algorithm_config={
-                "beta": 0,
-                "loss_type": "sigmoid",
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        post_training = response.parse()
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    def test_streaming_response_preference_optimize(self, client: LlamaStackClient) -> None:
-        with client.alpha.post_training.with_streaming_response.preference_optimize(
-            algorithm_config={
-                "beta": 0,
-                "loss_type": "sigmoid",
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            post_training = response.parse()
-            assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_supervised_fine_tune(self, client: LlamaStackClient) -> None:
-        post_training = client.alpha.post_training.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    def test_method_supervised_fine_tune_with_all_params(self, client: LlamaStackClient) -> None:
-        post_training = client.alpha.post_training.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-                "data_config": {
-                    "batch_size": 0,
-                    "data_format": "instruct",
-                    "dataset_id": "dataset_id",
-                    "shuffle": True,
-                    "packed": True,
-                    "train_on_input": True,
-                    "validation_dataset_id": "validation_dataset_id",
-                },
-                "dtype": "dtype",
-                "efficiency_config": {
-                    "enable_activation_checkpointing": True,
-                    "enable_activation_offloading": True,
-                    "fsdp_cpu_offload": True,
-                    "memory_efficient_fsdp_wrap": True,
-                },
-                "max_validation_steps": 0,
-                "optimizer_config": {
-                    "lr": 0,
-                    "num_warmup_steps": 0,
-                    "optimizer_type": "adam",
-                    "weight_decay": 0,
-                },
-            },
-            algorithm_config={
-                "alpha": 0,
-                "apply_lora_to_mlp": True,
-                "apply_lora_to_output": True,
-                "lora_attn_modules": ["string"],
-                "rank": 0,
-                "type": "LoRA",
-                "quantize_base": True,
-                "use_dora": True,
-            },
-            checkpoint_dir="checkpoint_dir",
-            model="model",
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    def test_raw_response_supervised_fine_tune(self, client: LlamaStackClient) -> None:
-        response = client.alpha.post_training.with_raw_response.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        post_training = response.parse()
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    def test_streaming_response_supervised_fine_tune(self, client: LlamaStackClient) -> None:
-        with client.alpha.post_training.with_streaming_response.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            post_training = response.parse()
-            assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncPostTraining:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.alpha.post_training.preference_optimize(
-            algorithm_config={
-                "beta": 0,
-                "loss_type": "sigmoid",
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    async def test_method_preference_optimize_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.alpha.post_training.preference_optimize(
-            algorithm_config={
-                "beta": 0,
-                "loss_type": "sigmoid",
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-                "data_config": {
-                    "batch_size": 0,
-                    "data_format": "instruct",
-                    "dataset_id": "dataset_id",
-                    "shuffle": True,
-                    "packed": True,
-                    "train_on_input": True,
-                    "validation_dataset_id": "validation_dataset_id",
-                },
-                "dtype": "dtype",
-                "efficiency_config": {
-                    "enable_activation_checkpointing": True,
-                    "enable_activation_offloading": True,
-                    "fsdp_cpu_offload": True,
-                    "memory_efficient_fsdp_wrap": True,
-                },
-                "max_validation_steps": 0,
-                "optimizer_config": {
-                    "lr": 0,
-                    "num_warmup_steps": 0,
-                    "optimizer_type": "adam",
-                    "weight_decay": 0,
-                },
-            },
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    async def test_raw_response_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.post_training.with_raw_response.preference_optimize(
-            algorithm_config={
-                "beta": 0,
-                "loss_type": "sigmoid",
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        post_training = await response.parse()
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.post_training.with_streaming_response.preference_optimize(
-            algorithm_config={
-                "beta": 0,
-                "loss_type": "sigmoid",
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            post_training = await response.parse()
-            assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.alpha.post_training.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    async def test_method_supervised_fine_tune_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.alpha.post_training.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-                "data_config": {
-                    "batch_size": 0,
-                    "data_format": "instruct",
-                    "dataset_id": "dataset_id",
-                    "shuffle": True,
-                    "packed": True,
-                    "train_on_input": True,
-                    "validation_dataset_id": "validation_dataset_id",
-                },
-                "dtype": "dtype",
-                "efficiency_config": {
-                    "enable_activation_checkpointing": True,
-                    "enable_activation_offloading": True,
-                    "fsdp_cpu_offload": True,
-                    "memory_efficient_fsdp_wrap": True,
-                },
-                "max_validation_steps": 0,
-                "optimizer_config": {
-                    "lr": 0,
-                    "num_warmup_steps": 0,
-                    "optimizer_type": "adam",
-                    "weight_decay": 0,
-                },
-            },
-            algorithm_config={
-                "alpha": 0,
-                "apply_lora_to_mlp": True,
-                "apply_lora_to_output": True,
-                "lora_attn_modules": ["string"],
-                "rank": 0,
-                "type": "LoRA",
-                "quantize_base": True,
-                "use_dora": True,
-            },
-            checkpoint_dir="checkpoint_dir",
-            model="model",
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    async def test_raw_response_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.alpha.post_training.with_raw_response.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        post_training = await response.parse()
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.alpha.post_training.with_streaming_response.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            post_training = await response.parse()
-            assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/__init__.py b/tests/api_resources/beta/__init__.py
deleted file mode 100644
index fd8019a9..00000000
--- a/tests/api_resources/beta/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/test_datasets.py b/tests/api_resources/beta/test_datasets.py
deleted file mode 100644
index 7a6fc7c9..00000000
--- a/tests/api_resources/beta/test_datasets.py
+++ /dev/null
@@ -1,527 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.beta import (
-    DatasetListResponse,
-    DatasetIterrowsResponse,
-    DatasetRegisterResponse,
-    DatasetRetrieveResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestDatasets:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        dataset = client.beta.datasets.retrieve(
-            "dataset_id",
-        )
-        assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.beta.datasets.with_raw_response.retrieve(
-            "dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = response.parse()
-        assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.beta.datasets.with_streaming_response.retrieve(
-            "dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = response.parse()
-            assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            client.beta.datasets.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        dataset = client.beta.datasets.list()
-        assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.beta.datasets.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = response.parse()
-        assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.beta.datasets.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = response.parse()
-            assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_appendrows(self, client: LlamaStackClient) -> None:
-        dataset = client.beta.datasets.appendrows(
-            dataset_id="dataset_id",
-            rows=[{"foo": True}],
-        )
-        assert dataset is None
-
-    @parametrize
-    def test_raw_response_appendrows(self, client: LlamaStackClient) -> None:
-        response = client.beta.datasets.with_raw_response.appendrows(
-            dataset_id="dataset_id",
-            rows=[{"foo": True}],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = response.parse()
-        assert dataset is None
-
-    @parametrize
-    def test_streaming_response_appendrows(self, client: LlamaStackClient) -> None:
-        with client.beta.datasets.with_streaming_response.appendrows(
-            dataset_id="dataset_id",
-            rows=[{"foo": True}],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = response.parse()
-            assert dataset is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_appendrows(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            client.beta.datasets.with_raw_response.appendrows(
-                dataset_id="",
-                rows=[{"foo": True}],
-            )
-
-    @parametrize
-    def test_method_iterrows(self, client: LlamaStackClient) -> None:
-        dataset = client.beta.datasets.iterrows(
-            dataset_id="dataset_id",
-        )
-        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_method_iterrows_with_all_params(self, client: LlamaStackClient) -> None:
-        dataset = client.beta.datasets.iterrows(
-            dataset_id="dataset_id",
-            limit=0,
-            start_index=0,
-        )
-        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_raw_response_iterrows(self, client: LlamaStackClient) -> None:
-        response = client.beta.datasets.with_raw_response.iterrows(
-            dataset_id="dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = response.parse()
-        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_streaming_response_iterrows(self, client: LlamaStackClient) -> None:
-        with client.beta.datasets.with_streaming_response.iterrows(
-            dataset_id="dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = response.parse()
-            assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_iterrows(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            client.beta.datasets.with_raw_response.iterrows(
-                dataset_id="",
-            )
-
-    @parametrize
-    def test_method_register(self, client: LlamaStackClient) -> None:
-        dataset = client.beta.datasets.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-        )
-        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
-        dataset = client.beta.datasets.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-            dataset_id="dataset_id",
-            metadata={"foo": True},
-        )
-        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_raw_response_register(self, client: LlamaStackClient) -> None:
-        response = client.beta.datasets.with_raw_response.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = response.parse()
-        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_streaming_response_register(self, client: LlamaStackClient) -> None:
-        with client.beta.datasets.with_streaming_response.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = response.parse()
-            assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_unregister(self, client: LlamaStackClient) -> None:
-        dataset = client.beta.datasets.unregister(
-            "dataset_id",
-        )
-        assert dataset is None
-
-    @parametrize
-    def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
-        response = client.beta.datasets.with_raw_response.unregister(
-            "dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = response.parse()
-        assert dataset is None
-
-    @parametrize
-    def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
-        with client.beta.datasets.with_streaming_response.unregister(
-            "dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = response.parse()
-            assert dataset is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_unregister(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            client.beta.datasets.with_raw_response.unregister(
-                "",
-            )
-
-
-class TestAsyncDatasets:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.beta.datasets.retrieve(
-            "dataset_id",
-        )
-        assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.beta.datasets.with_raw_response.retrieve(
-            "dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = await response.parse()
-        assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.beta.datasets.with_streaming_response.retrieve(
-            "dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = await response.parse()
-            assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            await async_client.beta.datasets.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.beta.datasets.list()
-        assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.beta.datasets.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = await response.parse()
-        assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.beta.datasets.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = await response.parse()
-            assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_appendrows(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.beta.datasets.appendrows(
-            dataset_id="dataset_id",
-            rows=[{"foo": True}],
-        )
-        assert dataset is None
-
-    @parametrize
-    async def test_raw_response_appendrows(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.beta.datasets.with_raw_response.appendrows(
-            dataset_id="dataset_id",
-            rows=[{"foo": True}],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = await response.parse()
-        assert dataset is None
-
-    @parametrize
-    async def test_streaming_response_appendrows(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.beta.datasets.with_streaming_response.appendrows(
-            dataset_id="dataset_id",
-            rows=[{"foo": True}],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = await response.parse()
-            assert dataset is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_appendrows(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            await async_client.beta.datasets.with_raw_response.appendrows(
-                dataset_id="",
-                rows=[{"foo": True}],
-            )
-
-    @parametrize
-    async def test_method_iterrows(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.beta.datasets.iterrows(
-            dataset_id="dataset_id",
-        )
-        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_method_iterrows_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.beta.datasets.iterrows(
-            dataset_id="dataset_id",
-            limit=0,
-            start_index=0,
-        )
-        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_raw_response_iterrows(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.beta.datasets.with_raw_response.iterrows(
-            dataset_id="dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = await response.parse()
-        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_iterrows(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.beta.datasets.with_streaming_response.iterrows(
-            dataset_id="dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = await response.parse()
-            assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_iterrows(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            await async_client.beta.datasets.with_raw_response.iterrows(
-                dataset_id="",
-            )
-
-    @parametrize
-    async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.beta.datasets.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-        )
-        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.beta.datasets.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-            dataset_id="dataset_id",
-            metadata={"foo": True},
-        )
-        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.beta.datasets.with_raw_response.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = await response.parse()
-        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.beta.datasets.with_streaming_response.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = await response.parse()
-            assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.beta.datasets.unregister(
-            "dataset_id",
-        )
-        assert dataset is None
-
-    @parametrize
-    async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.beta.datasets.with_raw_response.unregister(
-            "dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = await response.parse()
-        assert dataset is None
-
-    @parametrize
-    async def test_streaming_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.beta.datasets.with_streaming_response.unregister(
-            "dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = await response.parse()
-            assert dataset is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            await async_client.beta.datasets.with_raw_response.unregister(
-                "",
-            )
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
index 5ef731fd..3bdafe3c 100644
--- a/tests/api_resources/test_responses.py
+++ b/tests/api_resources/test_responses.py
@@ -46,6 +46,16 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
             instructions="instructions",
             max_infer_iters=0,
             previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {
+                    "foo": {
+                        "text": "text",
+                        "type": "input_text",
+                    }
+                },
+                "version": "version",
+            },
             store=True,
             stream=False,
             temperature=0,
@@ -113,6 +123,16 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
             instructions="instructions",
             max_infer_iters=0,
             previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {
+                    "foo": {
+                        "text": "text",
+                        "type": "input_text",
+                    }
+                },
+                "version": "version",
+            },
             store=True,
             temperature=0,
             text={
@@ -295,6 +315,16 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             instructions="instructions",
             max_infer_iters=0,
             previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {
+                    "foo": {
+                        "text": "text",
+                        "type": "input_text",
+                    }
+                },
+                "version": "version",
+            },
             store=True,
             stream=False,
             temperature=0,
@@ -362,6 +392,16 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             instructions="instructions",
             max_infer_iters=0,
             previous_response_id="previous_response_id",
+            prompt={
+                "id": "id",
+                "variables": {
+                    "foo": {
+                        "text": "text",
+                        "type": "input_text",
+                    }
+                },
+                "version": "version",
+            },
             store=True,
             temperature=0,
             text={
diff --git a/tests/api_resources/test_routes.py b/tests/api_resources/test_routes.py
index 9c863f26..58ab8ad9 100644
--- a/tests/api_resources/test_routes.py
+++ b/tests/api_resources/test_routes.py
@@ -28,6 +28,13 @@ def test_method_list(self, client: LlamaStackClient) -> None:
         route = client.routes.list()
         assert_matches_type(RouteListResponse, route, path=["response"])
 
+    @parametrize
+    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
+        route = client.routes.list(
+            api_filter="v1",
+        )
+        assert_matches_type(RouteListResponse, route, path=["response"])
+
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
         response = client.routes.with_raw_response.list()
@@ -59,6 +66,13 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         route = await async_client.routes.list()
         assert_matches_type(RouteListResponse, route, path=["response"])
 
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        route = await async_client.routes.list(
+            api_filter="v1",
+        )
+        assert_matches_type(RouteListResponse, route, path=["response"])
+
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.routes.with_raw_response.list()

From 209de45599de19183a1cd14bc3567e34d2374184 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 29 Oct 2025 17:10:47 +0000
Subject: [PATCH 2/9] chore(api)!: /v1/inspect only lists v1 apis by default

https://github.com/llamastack/llama-stack/pull/3948
---
 .stats.yml                                    |    6 +-
 api.md                                        |  188 +++
 src/llama_stack_client/_client.py             |   76 ++
 src/llama_stack_client/resources/__init__.py  |   28 +
 .../resources/alpha/__init__.py               |   89 ++
 .../resources/alpha/agents/__init__.py        |   61 +
 .../resources/alpha/agents/agents.py          |  528 ++++++++
 .../resources/alpha/agents/session.py         |  471 +++++++
 .../resources/alpha/agents/steps.py           |  181 +++
 .../resources/alpha/agents/turn.py            |  875 +++++++++++++
 .../resources/alpha/alpha.py                  |  230 ++++
 .../resources/alpha/benchmarks.py             |  359 ++++++
 .../resources/alpha/eval/__init__.py          |   33 +
 .../resources/alpha/eval/eval.py              |  530 ++++++++
 .../resources/alpha/eval/jobs.py              |  340 +++++
 .../resources/alpha/inference.py              |  218 ++++
 .../resources/alpha/post_training/__init__.py |   33 +
 .../resources/alpha/post_training/job.py      |  404 ++++++
 .../alpha/post_training/post_training.py      |  393 ++++++
 .../resources/beta/__init__.py                |   33 +
 src/llama_stack_client/resources/beta/beta.py |  102 ++
 .../resources/beta/datasets.py                |  676 ++++++++++
 src/llama_stack_client/types/__init__.py      |    4 +
 .../types/alpha/__init__.py                   |   34 +
 .../types/alpha/agent_create_params.py        |   14 +
 .../types/alpha/agent_create_response.py      |   10 +
 .../types/alpha/agent_list_params.py          |   15 +
 .../types/alpha/agent_list_response.py        |   18 +
 .../types/alpha/agent_retrieve_response.py    |   19 +
 .../types/alpha/agents/__init__.py            |   13 +
 .../agent_turn_response_stream_chunk.py       |   11 +
 .../types/alpha/agents/session.py             |   23 +
 .../alpha/agents/session_create_params.py     |   12 +
 .../alpha/agents/session_create_response.py   |   10 +
 .../types/alpha/agents/session_list_params.py |   15 +
 .../alpha/agents/session_list_response.py     |   18 +
 .../alpha/agents/session_retrieve_params.py   |   16 +
 .../alpha/agents/step_retrieve_response.py    |   23 +
 .../types/alpha/agents/turn.py                |  116 ++
 .../types/alpha/agents/turn_create_params.py  |  164 +++
 .../types/alpha/agents/turn_response_event.py |  160 +++
 .../types/alpha/agents/turn_resume_params.py  |   32 +
 .../types/alpha/algorithm_config_param.py     |   50 +
 .../types/alpha/benchmark.py                  |   28 +
 .../types/alpha/benchmark_config_param.py     |   53 +
 .../types/alpha/benchmark_list_response.py    |   10 +
 .../types/alpha/benchmark_register_params.py  |   30 +
 .../alpha/eval_evaluate_rows_alpha_params.py  |   22 +
 .../types/alpha/eval_evaluate_rows_params.py  |   22 +
 .../types/alpha/eval_run_eval_alpha_params.py |   14 +
 .../types/alpha/eval_run_eval_params.py       |   14 +
 .../types/alpha/evaluate_response.py          |   16 +
 .../types/alpha/inference_rerank_params.py    |  106 ++
 .../types/alpha/inference_rerank_response.py  |   23 +
 .../types/alpha/inference_step.py             |   32 +
 src/llama_stack_client/types/alpha/job.py     |   15 +
 .../types/alpha/list_benchmarks_response.py   |   10 +
 .../alpha/list_post_training_jobs_response.py |   10 +
 .../types/alpha/memory_retrieval_step.py      |   33 +
 .../types/alpha/post_training/__init__.py     |    7 +
 .../post_training/job_artifacts_params.py     |   12 +
 .../post_training/job_artifacts_response.py   |   50 +
 .../alpha/post_training/job_cancel_params.py  |   12 +
 .../alpha/post_training/job_list_response.py  |   15 +
 .../alpha/post_training/job_status_params.py  |   12 +
 .../post_training/job_status_response.py      |   66 +
 .../types/alpha/post_training_job.py          |    9 +
 ...ost_training_preference_optimize_params.py |  123 ++
 ...st_training_supervised_fine_tune_params.py |  119 ++
 .../types/alpha/shield_call_step.py           |   30 +
 .../types/alpha/tool_execution_step.py        |   34 +
 .../types/alpha/tool_response.py              |   23 +
 .../types/alpha/tool_response_param.py        |   24 +
 src/llama_stack_client/types/beta/__init__.py |    9 +
 .../types/beta/dataset_appendrows_params.py   |   13 +
 .../types/beta/dataset_iterrows_params.py     |   15 +
 .../types/beta/dataset_iterrows_response.py   |   18 +
 .../types/beta/dataset_list_response.py       |   66 +
 .../types/beta/dataset_register_params.py     |   69 +
 .../types/beta/dataset_register_response.py   |   54 +
 .../types/beta/dataset_retrieve_response.py   |   54 +
 .../types/beta/list_datasets_response.py      |   11 +
 .../types/shared/__init__.py                  |    3 +
 .../types/shared/agent_config.py              |   92 ++
 .../types/shared/response_format.py           |   33 +
 .../types/shared/sampling_params.py           |   70 ++
 .../types/shared_params/__init__.py           |    3 +
 .../types/shared_params/agent_config.py       |   94 ++
 .../types/shared_params/response_format.py    |   30 +
 .../types/shared_params/sampling_params.py    |   68 +
 .../types/tool_def_param.py                   |   28 +
 tests/api_resources/alpha/__init__.py         |    1 +
 tests/api_resources/alpha/agents/__init__.py  |    1 +
 .../alpha/agents/test_session.py              |  416 ++++++
 .../api_resources/alpha/agents/test_steps.py  |  172 +++
 tests/api_resources/alpha/agents/test_turn.py | 1030 +++++++++++++++
 tests/api_resources/alpha/eval/__init__.py    |    1 +
 tests/api_resources/alpha/eval/test_jobs.py   |  312 +++++
 .../alpha/post_training/__init__.py           |    1 +
 .../alpha/post_training/test_job.py           |  264 ++++
 tests/api_resources/alpha/test_agents.py      |  412 ++++++
 tests/api_resources/alpha/test_benchmarks.py  |  248 ++++
 tests/api_resources/alpha/test_eval.py        | 1115 +++++++++++++++++
 tests/api_resources/alpha/test_inference.py   |  118 ++
 .../api_resources/alpha/test_post_training.py |  446 +++++++
 tests/api_resources/beta/__init__.py          |    1 +
 tests/api_resources/beta/test_datasets.py     |  521 ++++++++
 107 files changed, 13391 insertions(+), 3 deletions(-)
 create mode 100644 src/llama_stack_client/resources/alpha/__init__.py
 create mode 100644 src/llama_stack_client/resources/alpha/agents/__init__.py
 create mode 100644 src/llama_stack_client/resources/alpha/agents/agents.py
 create mode 100644 src/llama_stack_client/resources/alpha/agents/session.py
 create mode 100644 src/llama_stack_client/resources/alpha/agents/steps.py
 create mode 100644 src/llama_stack_client/resources/alpha/agents/turn.py
 create mode 100644 src/llama_stack_client/resources/alpha/alpha.py
 create mode 100644 src/llama_stack_client/resources/alpha/benchmarks.py
 create mode 100644 src/llama_stack_client/resources/alpha/eval/__init__.py
 create mode 100644 src/llama_stack_client/resources/alpha/eval/eval.py
 create mode 100644 src/llama_stack_client/resources/alpha/eval/jobs.py
 create mode 100644 src/llama_stack_client/resources/alpha/inference.py
 create mode 100644 src/llama_stack_client/resources/alpha/post_training/__init__.py
 create mode 100644 src/llama_stack_client/resources/alpha/post_training/job.py
 create mode 100644 src/llama_stack_client/resources/alpha/post_training/post_training.py
 create mode 100644 src/llama_stack_client/resources/beta/__init__.py
 create mode 100644 src/llama_stack_client/resources/beta/beta.py
 create mode 100644 src/llama_stack_client/resources/beta/datasets.py
 create mode 100644 src/llama_stack_client/types/alpha/agent_create_params.py
 create mode 100644 src/llama_stack_client/types/alpha/agent_create_response.py
 create mode 100644 src/llama_stack_client/types/alpha/agent_list_params.py
 create mode 100644 src/llama_stack_client/types/alpha/agent_list_response.py
 create mode 100644 src/llama_stack_client/types/alpha/agent_retrieve_response.py
 create mode 100644 src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
 create mode 100644 src/llama_stack_client/types/alpha/agents/session.py
 create mode 100644 src/llama_stack_client/types/alpha/agents/session_create_params.py
 create mode 100644 src/llama_stack_client/types/alpha/agents/session_create_response.py
 create mode 100644 src/llama_stack_client/types/alpha/agents/session_list_params.py
 create mode 100644 src/llama_stack_client/types/alpha/agents/session_list_response.py
 create mode 100644 src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
 create mode 100644 src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
 create mode 100644 src/llama_stack_client/types/alpha/agents/turn.py
 create mode 100644 src/llama_stack_client/types/alpha/agents/turn_create_params.py
 create mode 100644 src/llama_stack_client/types/alpha/agents/turn_response_event.py
 create mode 100644 src/llama_stack_client/types/alpha/agents/turn_resume_params.py
 create mode 100644 src/llama_stack_client/types/alpha/algorithm_config_param.py
 create mode 100644 src/llama_stack_client/types/alpha/benchmark.py
 create mode 100644 src/llama_stack_client/types/alpha/benchmark_config_param.py
 create mode 100644 src/llama_stack_client/types/alpha/benchmark_list_response.py
 create mode 100644 src/llama_stack_client/types/alpha/benchmark_register_params.py
 create mode 100644 src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
 create mode 100644 src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
 create mode 100644 src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
 create mode 100644 src/llama_stack_client/types/alpha/eval_run_eval_params.py
 create mode 100644 src/llama_stack_client/types/alpha/evaluate_response.py
 create mode 100644 src/llama_stack_client/types/alpha/inference_rerank_params.py
 create mode 100644 src/llama_stack_client/types/alpha/inference_rerank_response.py
 create mode 100644 src/llama_stack_client/types/alpha/inference_step.py
 create mode 100644 src/llama_stack_client/types/alpha/job.py
 create mode 100644 src/llama_stack_client/types/alpha/list_benchmarks_response.py
 create mode 100644 src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
 create mode 100644 src/llama_stack_client/types/alpha/memory_retrieval_step.py
 create mode 100644 src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
 create mode 100644 src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
 create mode 100644 src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
 create mode 100644 src/llama_stack_client/types/alpha/post_training/job_list_response.py
 create mode 100644 src/llama_stack_client/types/alpha/post_training/job_status_params.py
 create mode 100644 src/llama_stack_client/types/alpha/post_training/job_status_response.py
 create mode 100644 src/llama_stack_client/types/alpha/post_training_job.py
 create mode 100644 src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
 create mode 100644 src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
 create mode 100644 src/llama_stack_client/types/alpha/shield_call_step.py
 create mode 100644 src/llama_stack_client/types/alpha/tool_execution_step.py
 create mode 100644 src/llama_stack_client/types/alpha/tool_response.py
 create mode 100644 src/llama_stack_client/types/alpha/tool_response_param.py
 create mode 100644 src/llama_stack_client/types/beta/dataset_appendrows_params.py
 create mode 100644 src/llama_stack_client/types/beta/dataset_iterrows_params.py
 create mode 100644 src/llama_stack_client/types/beta/dataset_iterrows_response.py
 create mode 100644 src/llama_stack_client/types/beta/dataset_list_response.py
 create mode 100644 src/llama_stack_client/types/beta/dataset_register_params.py
 create mode 100644 src/llama_stack_client/types/beta/dataset_register_response.py
 create mode 100644 src/llama_stack_client/types/beta/dataset_retrieve_response.py
 create mode 100644 src/llama_stack_client/types/beta/list_datasets_response.py
 create mode 100644 src/llama_stack_client/types/shared/agent_config.py
 create mode 100644 src/llama_stack_client/types/shared/response_format.py
 create mode 100644 src/llama_stack_client/types/shared/sampling_params.py
 create mode 100644 src/llama_stack_client/types/shared_params/agent_config.py
 create mode 100644 src/llama_stack_client/types/shared_params/response_format.py
 create mode 100644 src/llama_stack_client/types/shared_params/sampling_params.py
 create mode 100644 src/llama_stack_client/types/tool_def_param.py
 create mode 100644 tests/api_resources/alpha/__init__.py
 create mode 100644 tests/api_resources/alpha/agents/__init__.py
 create mode 100644 tests/api_resources/alpha/agents/test_session.py
 create mode 100644 tests/api_resources/alpha/agents/test_steps.py
 create mode 100644 tests/api_resources/alpha/agents/test_turn.py
 create mode 100644 tests/api_resources/alpha/eval/__init__.py
 create mode 100644 tests/api_resources/alpha/eval/test_jobs.py
 create mode 100644 tests/api_resources/alpha/post_training/__init__.py
 create mode 100644 tests/api_resources/alpha/post_training/test_job.py
 create mode 100644 tests/api_resources/alpha/test_agents.py
 create mode 100644 tests/api_resources/alpha/test_benchmarks.py
 create mode 100644 tests/api_resources/alpha/test_eval.py
 create mode 100644 tests/api_resources/alpha/test_inference.py
 create mode 100644 tests/api_resources/alpha/test_post_training.py
 create mode 100644 tests/api_resources/beta/__init__.py
 create mode 100644 tests/api_resources/beta/test_datasets.py

diff --git a/.stats.yml b/.stats.yml
index 7196faba..12443710 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 71
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-96255baaaf07826c5292cbb73073ab40aa7073c53996c3be49441a8ecf95c8ee.yml
-openapi_spec_hash: fae0303cbf75bd79be4ae084db015401
+configured_endpoints: 104
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-ab75f403b95703f8fe6c284da9efc1cc09d91cb27a4aa4da8660c825b56ddd02.yml
+openapi_spec_hash: 10f4950f76234968692b748956c83d52
 config_hash: a3829dbdaa491194d01f399784d532cd
diff --git a/api.md b/api.md
index 50e43a41..9dbfca84 100644
--- a/api.md
+++ b/api.md
@@ -2,6 +2,7 @@
 
 ```python
 from llama_stack_client.types import (
+    AgentConfig,
     CompletionMessage,
     Document,
     InterleavedContent,
@@ -10,7 +11,9 @@ from llama_stack_client.types import (
     ParamType,
     QueryConfig,
     QueryResult,
+    ResponseFormat,
     SafetyViolation,
+    SamplingParams,
     ScoringResult,
     SystemMessage,
     ToolCall,
@@ -413,3 +416,188 @@ Methods:
 - <code title="get /v1/files">client.files.<a href="./src/llama_stack_client/resources/files.py">list</a>(\*\*<a href="src/llama_stack_client/types/file_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/file.py">SyncOpenAICursorPage[File]</a></code>
 - <code title="delete /v1/files/{file_id}">client.files.<a href="./src/llama_stack_client/resources/files.py">delete</a>(file_id) -> <a href="./src/llama_stack_client/types/delete_file_response.py">DeleteFileResponse</a></code>
 - <code title="get /v1/files/{file_id}/content">client.files.<a href="./src/llama_stack_client/resources/files.py">content</a>(file_id) -> object</code>
+
+# Alpha
+
+## Inference
+
+Types:
+
+```python
+from llama_stack_client.types.alpha import InferenceRerankResponse
+```
+
+Methods:
+
+- <code title="post /v1alpha/inference/rerank">client.alpha.inference.<a href="./src/llama_stack_client/resources/alpha/inference.py">rerank</a>(\*\*<a href="src/llama_stack_client/types/alpha/inference_rerank_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/inference_rerank_response.py">InferenceRerankResponse</a></code>
+
+## PostTraining
+
+Types:
+
+```python
+from llama_stack_client.types.alpha import (
+    AlgorithmConfig,
+    ListPostTrainingJobsResponse,
+    PostTrainingJob,
+)
+```
+
+Methods:
+
+- <code title="post /v1alpha/post-training/preference-optimize">client.alpha.post_training.<a href="./src/llama_stack_client/resources/alpha/post_training/post_training.py">preference_optimize</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training_job.py">PostTrainingJob</a></code>
+- <code title="post /v1alpha/post-training/supervised-fine-tune">client.alpha.post_training.<a href="./src/llama_stack_client/resources/alpha/post_training/post_training.py">supervised_fine_tune</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training_job.py">PostTrainingJob</a></code>
+
+### Job
+
+Types:
+
+```python
+from llama_stack_client.types.alpha.post_training import (
+    JobListResponse,
+    JobArtifactsResponse,
+    JobStatusResponse,
+)
+```
+
+Methods:
+
+- <code title="get /v1alpha/post-training/jobs">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">list</a>() -> <a href="./src/llama_stack_client/types/alpha/post_training/job_list_response.py">JobListResponse</a></code>
+- <code title="get /v1alpha/post-training/job/artifacts">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">artifacts</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py">JobArtifactsResponse</a></code>
+- <code title="post /v1alpha/post-training/job/cancel">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">cancel</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training/job_cancel_params.py">params</a>) -> None</code>
+- <code title="get /v1alpha/post-training/job/status">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">status</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training/job_status_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training/job_status_response.py">JobStatusResponse</a></code>
+
+## Benchmarks
+
+Types:
+
+```python
+from llama_stack_client.types.alpha import Benchmark, ListBenchmarksResponse, BenchmarkListResponse
+```
+
+Methods:
+
+- <code title="get /v1alpha/eval/benchmarks/{benchmark_id}">client.alpha.benchmarks.<a href="./src/llama_stack_client/resources/alpha/benchmarks.py">retrieve</a>(benchmark_id) -> <a href="./src/llama_stack_client/types/alpha/benchmark.py">Benchmark</a></code>
+- <code title="get /v1alpha/eval/benchmarks">client.alpha.benchmarks.<a href="./src/llama_stack_client/resources/alpha/benchmarks.py">list</a>() -> <a href="./src/llama_stack_client/types/alpha/benchmark_list_response.py">BenchmarkListResponse</a></code>
+- <code title="post /v1alpha/eval/benchmarks">client.alpha.benchmarks.<a href="./src/llama_stack_client/resources/alpha/benchmarks.py">register</a>(\*\*<a href="src/llama_stack_client/types/alpha/benchmark_register_params.py">params</a>) -> None</code>
+
+## Eval
+
+Types:
+
+```python
+from llama_stack_client.types.alpha import BenchmarkConfig, EvaluateResponse, Job
+```
+
+Methods:
+
+- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">evaluate_rows</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/evaluate_response.py">EvaluateResponse</a></code>
+- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">evaluate_rows_alpha</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/evaluate_response.py">EvaluateResponse</a></code>
+- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/jobs">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">run_eval</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_run_eval_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/job.py">Job</a></code>
+- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/jobs">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">run_eval_alpha</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/job.py">Job</a></code>
+
+### Jobs
+
+Methods:
+
+- <code title="get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">retrieve</a>(job_id, \*, benchmark_id) -> <a href="./src/llama_stack_client/types/alpha/evaluate_response.py">EvaluateResponse</a></code>
+- <code title="delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">cancel</a>(job_id, \*, benchmark_id) -> None</code>
+- <code title="get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">status</a>(job_id, \*, benchmark_id) -> <a href="./src/llama_stack_client/types/alpha/job.py">Job</a></code>
+
+## Agents
+
+Types:
+
+```python
+from llama_stack_client.types.alpha import (
+    InferenceStep,
+    MemoryRetrievalStep,
+    ShieldCallStep,
+    ToolExecutionStep,
+    ToolResponse,
+    AgentCreateResponse,
+    AgentRetrieveResponse,
+    AgentListResponse,
+)
+```
+
+Methods:
+
+- <code title="post /v1alpha/agents">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">create</a>(\*\*<a href="src/llama_stack_client/types/alpha/agent_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agent_create_response.py">AgentCreateResponse</a></code>
+- <code title="get /v1alpha/agents/{agent_id}">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">retrieve</a>(agent_id) -> <a href="./src/llama_stack_client/types/alpha/agent_retrieve_response.py">AgentRetrieveResponse</a></code>
+- <code title="get /v1alpha/agents">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">list</a>(\*\*<a href="src/llama_stack_client/types/alpha/agent_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agent_list_response.py">AgentListResponse</a></code>
+- <code title="delete /v1alpha/agents/{agent_id}">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">delete</a>(agent_id) -> None</code>
+
+### Session
+
+Types:
+
+```python
+from llama_stack_client.types.alpha.agents import (
+    Session,
+    SessionCreateResponse,
+    SessionListResponse,
+)
+```
+
+Methods:
+
+- <code title="post /v1alpha/agents/{agent_id}/session">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">create</a>(agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session_create_response.py">SessionCreateResponse</a></code>
+- <code title="get /v1alpha/agents/{agent_id}/session/{session_id}">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">retrieve</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session.py">Session</a></code>
+- <code title="get /v1alpha/agents/{agent_id}/sessions">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">list</a>(agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session_list_response.py">SessionListResponse</a></code>
+- <code title="delete /v1alpha/agents/{agent_id}/session/{session_id}">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">delete</a>(session_id, \*, agent_id) -> None</code>
+
+### Steps
+
+Types:
+
+```python
+from llama_stack_client.types.alpha.agents import StepRetrieveResponse
+```
+
+Methods:
+
+- <code title="get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}">client.alpha.agents.steps.<a href="./src/llama_stack_client/resources/alpha/agents/steps.py">retrieve</a>(step_id, \*, agent_id, session_id, turn_id) -> <a href="./src/llama_stack_client/types/alpha/agents/step_retrieve_response.py">StepRetrieveResponse</a></code>
+
+### Turn
+
+Types:
+
+```python
+from llama_stack_client.types.alpha.agents import (
+    AgentTurnResponseStreamChunk,
+    Turn,
+    TurnResponseEvent,
+)
+```
+
+Methods:
+
+- <code title="post /v1alpha/agents/{agent_id}/session/{session_id}/turn">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">create</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/turn_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
+- <code title="get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">retrieve</a>(turn_id, \*, agent_id, session_id) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
+- <code title="post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">resume</a>(turn_id, \*, agent_id, session_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/turn_resume_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
+
+# Beta
+
+## Datasets
+
+Types:
+
+```python
+from llama_stack_client.types.beta import (
+    ListDatasetsResponse,
+    DatasetRetrieveResponse,
+    DatasetListResponse,
+    DatasetIterrowsResponse,
+    DatasetRegisterResponse,
+)
+```
+
+Methods:
+
+- <code title="get /v1beta/datasets/{dataset_id}">client.beta.datasets.<a href="./src/llama_stack_client/resources/beta/datasets.py">retrieve</a>(dataset_id) -> <a href="./src/llama_stack_client/types/beta/dataset_retrieve_response.py">DatasetRetrieveResponse</a></code>
+- <code title="get /v1beta/datasets">client.beta.datasets.<a href="./src/llama_stack_client/resources/beta/datasets.py">list</a>() -> <a href="./src/llama_stack_client/types/beta/dataset_list_response.py">DatasetListResponse</a></code>
+- <code title="post /v1beta/datasetio/append-rows/{dataset_id}">client.beta.datasets.<a href="./src/llama_stack_client/resources/beta/datasets.py">appendrows</a>(dataset_id, \*\*<a href="src/llama_stack_client/types/beta/dataset_appendrows_params.py">params</a>) -> None</code>
+- <code title="get /v1beta/datasetio/iterrows/{dataset_id}">client.beta.datasets.<a href="./src/llama_stack_client/resources/beta/datasets.py">iterrows</a>(dataset_id, \*\*<a href="src/llama_stack_client/types/beta/dataset_iterrows_params.py">params</a>) -> <a href="./src/llama_stack_client/types/beta/dataset_iterrows_response.py">DatasetIterrowsResponse</a></code>
+- <code title="post /v1beta/datasets">client.beta.datasets.<a href="./src/llama_stack_client/resources/beta/datasets.py">register</a>(\*\*<a href="src/llama_stack_client/types/beta/dataset_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/beta/dataset_register_response.py">DatasetRegisterResponse</a></code>
+- <code title="delete /v1beta/datasets/{dataset_id}">client.beta.datasets.<a href="./src/llama_stack_client/resources/beta/datasets.py">unregister</a>(dataset_id) -> None</code>
diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
index 96289edd..34de181a 100644
--- a/src/llama_stack_client/_client.py
+++ b/src/llama_stack_client/_client.py
@@ -39,7 +39,9 @@
 
 if TYPE_CHECKING:
     from .resources import (
+        beta,
         chat,
+        alpha,
         files,
         tools,
         models,
@@ -68,11 +70,13 @@
     from .resources.inspect import InspectResource, AsyncInspectResource
     from .resources.scoring import ScoringResource, AsyncScoringResource
     from .resources.shields import ShieldsResource, AsyncShieldsResource
+    from .resources.beta.beta import BetaResource, AsyncBetaResource
     from .resources.chat.chat import ChatResource, AsyncChatResource
     from .resources.providers import ProvidersResource, AsyncProvidersResource
     from .resources.vector_io import VectorIoResource, AsyncVectorIoResource
     from .resources.embeddings import EmbeddingsResource, AsyncEmbeddingsResource
     from .resources.toolgroups import ToolgroupsResource, AsyncToolgroupsResource
+    from .resources.alpha.alpha import AlphaResource, AsyncAlphaResource
     from .resources.completions import CompletionsResource, AsyncCompletionsResource
     from .resources.moderations import ModerationsResource, AsyncModerationsResource
     from .resources.models.models import ModelsResource, AsyncModelsResource
@@ -281,6 +285,18 @@ def files(self) -> FilesResource:
 
         return FilesResource(self)
 
+    @cached_property
+    def alpha(self) -> AlphaResource:
+        from .resources.alpha import AlphaResource
+
+        return AlphaResource(self)
+
+    @cached_property
+    def beta(self) -> BetaResource:
+        from .resources.beta import BetaResource
+
+        return BetaResource(self)
+
     @cached_property
     def with_raw_response(self) -> LlamaStackClientWithRawResponse:
         return LlamaStackClientWithRawResponse(self)
@@ -579,6 +595,18 @@ def files(self) -> AsyncFilesResource:
 
         return AsyncFilesResource(self)
 
+    @cached_property
+    def alpha(self) -> AsyncAlphaResource:
+        from .resources.alpha import AsyncAlphaResource
+
+        return AsyncAlphaResource(self)
+
+    @cached_property
+    def beta(self) -> AsyncBetaResource:
+        from .resources.beta import AsyncBetaResource
+
+        return AsyncBetaResource(self)
+
     @cached_property
     def with_raw_response(self) -> AsyncLlamaStackClientWithRawResponse:
         return AsyncLlamaStackClientWithRawResponse(self)
@@ -826,6 +854,18 @@ def files(self) -> files.FilesResourceWithRawResponse:
 
         return FilesResourceWithRawResponse(self._client.files)
 
+    @cached_property
+    def alpha(self) -> alpha.AlphaResourceWithRawResponse:
+        from .resources.alpha import AlphaResourceWithRawResponse
+
+        return AlphaResourceWithRawResponse(self._client.alpha)
+
+    @cached_property
+    def beta(self) -> beta.BetaResourceWithRawResponse:
+        from .resources.beta import BetaResourceWithRawResponse
+
+        return BetaResourceWithRawResponse(self._client.beta)
+
 
 class AsyncLlamaStackClientWithRawResponse:
     _client: AsyncLlamaStackClient
@@ -961,6 +1001,18 @@ def files(self) -> files.AsyncFilesResourceWithRawResponse:
 
         return AsyncFilesResourceWithRawResponse(self._client.files)
 
+    @cached_property
+    def alpha(self) -> alpha.AsyncAlphaResourceWithRawResponse:
+        from .resources.alpha import AsyncAlphaResourceWithRawResponse
+
+        return AsyncAlphaResourceWithRawResponse(self._client.alpha)
+
+    @cached_property
+    def beta(self) -> beta.AsyncBetaResourceWithRawResponse:
+        from .resources.beta import AsyncBetaResourceWithRawResponse
+
+        return AsyncBetaResourceWithRawResponse(self._client.beta)
+
 
 class LlamaStackClientWithStreamedResponse:
     _client: LlamaStackClient
@@ -1096,6 +1148,18 @@ def files(self) -> files.FilesResourceWithStreamingResponse:
 
         return FilesResourceWithStreamingResponse(self._client.files)
 
+    @cached_property
+    def alpha(self) -> alpha.AlphaResourceWithStreamingResponse:
+        from .resources.alpha import AlphaResourceWithStreamingResponse
+
+        return AlphaResourceWithStreamingResponse(self._client.alpha)
+
+    @cached_property
+    def beta(self) -> beta.BetaResourceWithStreamingResponse:
+        from .resources.beta import BetaResourceWithStreamingResponse
+
+        return BetaResourceWithStreamingResponse(self._client.beta)
+
 
 class AsyncLlamaStackClientWithStreamedResponse:
     _client: AsyncLlamaStackClient
@@ -1231,6 +1295,18 @@ def files(self) -> files.AsyncFilesResourceWithStreamingResponse:
 
         return AsyncFilesResourceWithStreamingResponse(self._client.files)
 
+    @cached_property
+    def alpha(self) -> alpha.AsyncAlphaResourceWithStreamingResponse:
+        from .resources.alpha import AsyncAlphaResourceWithStreamingResponse
+
+        return AsyncAlphaResourceWithStreamingResponse(self._client.alpha)
+
+    @cached_property
+    def beta(self) -> beta.AsyncBetaResourceWithStreamingResponse:
+        from .resources.beta import AsyncBetaResourceWithStreamingResponse
+
+        return AsyncBetaResourceWithStreamingResponse(self._client.beta)
+
 
 Client = LlamaStackClient
 
diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py
index 3ca8c1c8..60b18979 100644
--- a/src/llama_stack_client/resources/__init__.py
+++ b/src/llama_stack_client/resources/__init__.py
@@ -6,6 +6,14 @@
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .beta import (
+    BetaResource,
+    AsyncBetaResource,
+    BetaResourceWithRawResponse,
+    AsyncBetaResourceWithRawResponse,
+    BetaResourceWithStreamingResponse,
+    AsyncBetaResourceWithStreamingResponse,
+)
 from .chat import (
     ChatResource,
     AsyncChatResource,
@@ -14,6 +22,14 @@
     ChatResourceWithStreamingResponse,
     AsyncChatResourceWithStreamingResponse,
 )
+from .alpha import (
+    AlphaResource,
+    AsyncAlphaResource,
+    AlphaResourceWithRawResponse,
+    AsyncAlphaResourceWithRawResponse,
+    AlphaResourceWithStreamingResponse,
+    AsyncAlphaResourceWithStreamingResponse,
+)
 from .files import (
     FilesResource,
     AsyncFilesResource,
@@ -302,4 +318,16 @@
     "AsyncFilesResourceWithRawResponse",
     "FilesResourceWithStreamingResponse",
     "AsyncFilesResourceWithStreamingResponse",
+    "AlphaResource",
+    "AsyncAlphaResource",
+    "AlphaResourceWithRawResponse",
+    "AsyncAlphaResourceWithRawResponse",
+    "AlphaResourceWithStreamingResponse",
+    "AsyncAlphaResourceWithStreamingResponse",
+    "BetaResource",
+    "AsyncBetaResource",
+    "BetaResourceWithRawResponse",
+    "AsyncBetaResourceWithRawResponse",
+    "BetaResourceWithStreamingResponse",
+    "AsyncBetaResourceWithStreamingResponse",
 ]
diff --git a/src/llama_stack_client/resources/alpha/__init__.py b/src/llama_stack_client/resources/alpha/__init__.py
new file mode 100644
index 00000000..84d9534a
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/__init__.py
@@ -0,0 +1,89 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .eval import (
+    EvalResource,
+    AsyncEvalResource,
+    EvalResourceWithRawResponse,
+    AsyncEvalResourceWithRawResponse,
+    EvalResourceWithStreamingResponse,
+    AsyncEvalResourceWithStreamingResponse,
+)
+from .alpha import (
+    AlphaResource,
+    AsyncAlphaResource,
+    AlphaResourceWithRawResponse,
+    AsyncAlphaResourceWithRawResponse,
+    AlphaResourceWithStreamingResponse,
+    AsyncAlphaResourceWithStreamingResponse,
+)
+from .agents import (
+    AgentsResource,
+    AsyncAgentsResource,
+    AgentsResourceWithRawResponse,
+    AsyncAgentsResourceWithRawResponse,
+    AgentsResourceWithStreamingResponse,
+    AsyncAgentsResourceWithStreamingResponse,
+)
+from .inference import (
+    InferenceResource,
+    AsyncInferenceResource,
+    InferenceResourceWithRawResponse,
+    AsyncInferenceResourceWithRawResponse,
+    InferenceResourceWithStreamingResponse,
+    AsyncInferenceResourceWithStreamingResponse,
+)
+from .benchmarks import (
+    BenchmarksResource,
+    AsyncBenchmarksResource,
+    BenchmarksResourceWithRawResponse,
+    AsyncBenchmarksResourceWithRawResponse,
+    BenchmarksResourceWithStreamingResponse,
+    AsyncBenchmarksResourceWithStreamingResponse,
+)
+from .post_training import (
+    PostTrainingResource,
+    AsyncPostTrainingResource,
+    PostTrainingResourceWithRawResponse,
+    AsyncPostTrainingResourceWithRawResponse,
+    PostTrainingResourceWithStreamingResponse,
+    AsyncPostTrainingResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "InferenceResource",
+    "AsyncInferenceResource",
+    "InferenceResourceWithRawResponse",
+    "AsyncInferenceResourceWithRawResponse",
+    "InferenceResourceWithStreamingResponse",
+    "AsyncInferenceResourceWithStreamingResponse",
+    "PostTrainingResource",
+    "AsyncPostTrainingResource",
+    "PostTrainingResourceWithRawResponse",
+    "AsyncPostTrainingResourceWithRawResponse",
+    "PostTrainingResourceWithStreamingResponse",
+    "AsyncPostTrainingResourceWithStreamingResponse",
+    "BenchmarksResource",
+    "AsyncBenchmarksResource",
+    "BenchmarksResourceWithRawResponse",
+    "AsyncBenchmarksResourceWithRawResponse",
+    "BenchmarksResourceWithStreamingResponse",
+    "AsyncBenchmarksResourceWithStreamingResponse",
+    "EvalResource",
+    "AsyncEvalResource",
+    "EvalResourceWithRawResponse",
+    "AsyncEvalResourceWithRawResponse",
+    "EvalResourceWithStreamingResponse",
+    "AsyncEvalResourceWithStreamingResponse",
+    "AgentsResource",
+    "AsyncAgentsResource",
+    "AgentsResourceWithRawResponse",
+    "AsyncAgentsResourceWithRawResponse",
+    "AgentsResourceWithStreamingResponse",
+    "AsyncAgentsResourceWithStreamingResponse",
+    "AlphaResource",
+    "AsyncAlphaResource",
+    "AlphaResourceWithRawResponse",
+    "AsyncAlphaResourceWithRawResponse",
+    "AlphaResourceWithStreamingResponse",
+    "AsyncAlphaResourceWithStreamingResponse",
+]
diff --git a/src/llama_stack_client/resources/alpha/agents/__init__.py b/src/llama_stack_client/resources/alpha/agents/__init__.py
new file mode 100644
index 00000000..17f0098f
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/agents/__init__.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .turn import (
+    TurnResource,
+    AsyncTurnResource,
+    TurnResourceWithRawResponse,
+    AsyncTurnResourceWithRawResponse,
+    TurnResourceWithStreamingResponse,
+    AsyncTurnResourceWithStreamingResponse,
+)
+from .steps import (
+    StepsResource,
+    AsyncStepsResource,
+    StepsResourceWithRawResponse,
+    AsyncStepsResourceWithRawResponse,
+    StepsResourceWithStreamingResponse,
+    AsyncStepsResourceWithStreamingResponse,
+)
+from .agents import (
+    AgentsResource,
+    AsyncAgentsResource,
+    AgentsResourceWithRawResponse,
+    AsyncAgentsResourceWithRawResponse,
+    AgentsResourceWithStreamingResponse,
+    AsyncAgentsResourceWithStreamingResponse,
+)
+from .session import (
+    SessionResource,
+    AsyncSessionResource,
+    SessionResourceWithRawResponse,
+    AsyncSessionResourceWithRawResponse,
+    SessionResourceWithStreamingResponse,
+    AsyncSessionResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "SessionResource",
+    "AsyncSessionResource",
+    "SessionResourceWithRawResponse",
+    "AsyncSessionResourceWithRawResponse",
+    "SessionResourceWithStreamingResponse",
+    "AsyncSessionResourceWithStreamingResponse",
+    "StepsResource",
+    "AsyncStepsResource",
+    "StepsResourceWithRawResponse",
+    "AsyncStepsResourceWithRawResponse",
+    "StepsResourceWithStreamingResponse",
+    "AsyncStepsResourceWithStreamingResponse",
+    "TurnResource",
+    "AsyncTurnResource",
+    "TurnResourceWithRawResponse",
+    "AsyncTurnResourceWithRawResponse",
+    "TurnResourceWithStreamingResponse",
+    "AsyncTurnResourceWithStreamingResponse",
+    "AgentsResource",
+    "AsyncAgentsResource",
+    "AgentsResourceWithRawResponse",
+    "AsyncAgentsResourceWithRawResponse",
+    "AgentsResourceWithStreamingResponse",
+    "AsyncAgentsResourceWithStreamingResponse",
+]
diff --git a/src/llama_stack_client/resources/alpha/agents/agents.py b/src/llama_stack_client/resources/alpha/agents/agents.py
new file mode 100644
index 00000000..0e81cce7
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/agents/agents.py
@@ -0,0 +1,528 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .turn import (
+    TurnResource,
+    AsyncTurnResource,
+    TurnResourceWithRawResponse,
+    AsyncTurnResourceWithRawResponse,
+    TurnResourceWithStreamingResponse,
+    AsyncTurnResourceWithStreamingResponse,
+)
+from .steps import (
+    StepsResource,
+    AsyncStepsResource,
+    StepsResourceWithRawResponse,
+    AsyncStepsResourceWithRawResponse,
+    StepsResourceWithStreamingResponse,
+    AsyncStepsResourceWithStreamingResponse,
+)
+from .session import (
+    SessionResource,
+    AsyncSessionResource,
+    SessionResourceWithRawResponse,
+    AsyncSessionResourceWithRawResponse,
+    SessionResourceWithStreamingResponse,
+    AsyncSessionResourceWithStreamingResponse,
+)
+from ...._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ....types.alpha import agent_list_params, agent_create_params
+from ...._base_client import make_request_options
+from ....types.alpha.agent_list_response import AgentListResponse
+from ....types.shared_params.agent_config import AgentConfig
+from ....types.alpha.agent_create_response import AgentCreateResponse
+from ....types.alpha.agent_retrieve_response import AgentRetrieveResponse
+
+__all__ = ["AgentsResource", "AsyncAgentsResource"]
+
+
+class AgentsResource(SyncAPIResource):
+    @cached_property
+    def session(self) -> SessionResource:
+        return SessionResource(self._client)
+
+    @cached_property
+    def steps(self) -> StepsResource:
+        return StepsResource(self._client)
+
+    @cached_property
+    def turn(self) -> TurnResource:
+        return TurnResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AgentsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AgentsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AgentsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AgentsResourceWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        agent_config: AgentConfig,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AgentCreateResponse:
+        """
+        Create an agent with the given configuration.
+
+        Args:
+          agent_config: The configuration for the agent.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1alpha/agents",
+            body=maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AgentCreateResponse,
+        )
+
+    def retrieve(
+        self,
+        agent_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AgentRetrieveResponse:
+        """
+        Describe an agent by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        return self._get(
+            f"/v1alpha/agents/{agent_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AgentRetrieveResponse,
+        )
+
+    def list(
+        self,
+        *,
+        limit: int | Omit = omit,
+        start_index: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AgentListResponse:
+        """
+        List all agents.
+
+        Args:
+          limit: The number of agents to return.
+
+          start_index: The index to start the pagination from.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get(
+            "/v1alpha/agents",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "limit": limit,
+                        "start_index": start_index,
+                    },
+                    agent_list_params.AgentListParams,
+                ),
+            ),
+            cast_to=AgentListResponse,
+        )
+
+    def delete(
+        self,
+        agent_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Delete an agent by its ID and its associated sessions and turns.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/v1alpha/agents/{agent_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncAgentsResource(AsyncAPIResource):
+    @cached_property
+    def session(self) -> AsyncSessionResource:
+        return AsyncSessionResource(self._client)
+
+    @cached_property
+    def steps(self) -> AsyncStepsResource:
+        return AsyncStepsResource(self._client)
+
+    @cached_property
+    def turn(self) -> AsyncTurnResource:
+        return AsyncTurnResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncAgentsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAgentsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAgentsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncAgentsResourceWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        agent_config: AgentConfig,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AgentCreateResponse:
+        """
+        Create an agent with the given configuration.
+
+        Args:
+          agent_config: The configuration for the agent.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1alpha/agents",
+            body=await async_maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AgentCreateResponse,
+        )
+
+    async def retrieve(
+        self,
+        agent_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AgentRetrieveResponse:
+        """
+        Describe an agent by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        return await self._get(
+            f"/v1alpha/agents/{agent_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AgentRetrieveResponse,
+        )
+
+    async def list(
+        self,
+        *,
+        limit: int | Omit = omit,
+        start_index: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AgentListResponse:
+        """
+        List all agents.
+
+        Args:
+          limit: The number of agents to return.
+
+          start_index: The index to start the pagination from.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._get(
+            "/v1alpha/agents",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "limit": limit,
+                        "start_index": start_index,
+                    },
+                    agent_list_params.AgentListParams,
+                ),
+            ),
+            cast_to=AgentListResponse,
+        )
+
+    async def delete(
+        self,
+        agent_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Delete an agent by its ID and its associated sessions and turns.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/v1alpha/agents/{agent_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AgentsResourceWithRawResponse:
+    def __init__(self, agents: AgentsResource) -> None:
+        self._agents = agents
+
+        self.create = to_raw_response_wrapper(
+            agents.create,
+        )
+        self.retrieve = to_raw_response_wrapper(
+            agents.retrieve,
+        )
+        self.list = to_raw_response_wrapper(
+            agents.list,
+        )
+        self.delete = to_raw_response_wrapper(
+            agents.delete,
+        )
+
+    @cached_property
+    def session(self) -> SessionResourceWithRawResponse:
+        return SessionResourceWithRawResponse(self._agents.session)
+
+    @cached_property
+    def steps(self) -> StepsResourceWithRawResponse:
+        return StepsResourceWithRawResponse(self._agents.steps)
+
+    @cached_property
+    def turn(self) -> TurnResourceWithRawResponse:
+        return TurnResourceWithRawResponse(self._agents.turn)
+
+
+class AsyncAgentsResourceWithRawResponse:
+    def __init__(self, agents: AsyncAgentsResource) -> None:
+        self._agents = agents
+
+        self.create = async_to_raw_response_wrapper(
+            agents.create,
+        )
+        self.retrieve = async_to_raw_response_wrapper(
+            agents.retrieve,
+        )
+        self.list = async_to_raw_response_wrapper(
+            agents.list,
+        )
+        self.delete = async_to_raw_response_wrapper(
+            agents.delete,
+        )
+
+    @cached_property
+    def session(self) -> AsyncSessionResourceWithRawResponse:
+        return AsyncSessionResourceWithRawResponse(self._agents.session)
+
+    @cached_property
+    def steps(self) -> AsyncStepsResourceWithRawResponse:
+        return AsyncStepsResourceWithRawResponse(self._agents.steps)
+
+    @cached_property
+    def turn(self) -> AsyncTurnResourceWithRawResponse:
+        return AsyncTurnResourceWithRawResponse(self._agents.turn)
+
+
+class AgentsResourceWithStreamingResponse:
+    def __init__(self, agents: AgentsResource) -> None:
+        self._agents = agents
+
+        self.create = to_streamed_response_wrapper(
+            agents.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            agents.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            agents.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            agents.delete,
+        )
+
+    @cached_property
+    def session(self) -> SessionResourceWithStreamingResponse:
+        return SessionResourceWithStreamingResponse(self._agents.session)
+
+    @cached_property
+    def steps(self) -> StepsResourceWithStreamingResponse:
+        return StepsResourceWithStreamingResponse(self._agents.steps)
+
+    @cached_property
+    def turn(self) -> TurnResourceWithStreamingResponse:
+        return TurnResourceWithStreamingResponse(self._agents.turn)
+
+
+class AsyncAgentsResourceWithStreamingResponse:
+    def __init__(self, agents: AsyncAgentsResource) -> None:
+        self._agents = agents
+
+        self.create = async_to_streamed_response_wrapper(
+            agents.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            agents.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            agents.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            agents.delete,
+        )
+
+    @cached_property
+    def session(self) -> AsyncSessionResourceWithStreamingResponse:
+        return AsyncSessionResourceWithStreamingResponse(self._agents.session)
+
+    @cached_property
+    def steps(self) -> AsyncStepsResourceWithStreamingResponse:
+        return AsyncStepsResourceWithStreamingResponse(self._agents.steps)
+
+    @cached_property
+    def turn(self) -> AsyncTurnResourceWithStreamingResponse:
+        return AsyncTurnResourceWithStreamingResponse(self._agents.turn)
diff --git a/src/llama_stack_client/resources/alpha/agents/session.py b/src/llama_stack_client/resources/alpha/agents/session.py
new file mode 100644
index 00000000..2e980add
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/agents/session.py
@@ -0,0 +1,471 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ...._types import Body, Omit, Query, Headers, NoneType, NotGiven, SequenceNotStr, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...._base_client import make_request_options
+from ....types.alpha.agents import session_list_params, session_create_params, session_retrieve_params
+from ....types.alpha.agents.session import Session
+from ....types.alpha.agents.session_list_response import SessionListResponse
+from ....types.alpha.agents.session_create_response import SessionCreateResponse
+
+__all__ = ["SessionResource", "AsyncSessionResource"]
+
+
+class SessionResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SessionResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return SessionResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SessionResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return SessionResourceWithStreamingResponse(self)
+
+    def create(
+        self,
+        agent_id: str,
+        *,
+        session_name: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SessionCreateResponse:
+        """
+        Create a new session for an agent.
+
+        Args:
+          session_name: The name of the session to create.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        return self._post(
+            f"/v1alpha/agents/{agent_id}/session",
+            body=maybe_transform({"session_name": session_name}, session_create_params.SessionCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+    def retrieve(
+        self,
+        session_id: str,
+        *,
+        agent_id: str,
+        turn_ids: SequenceNotStr[str] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Session:
+        """
+        Retrieve an agent session by its ID.
+
+        Args:
+          turn_ids: (Optional) List of turn IDs to filter the session by.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        return self._get(
+            f"/v1alpha/agents/{agent_id}/session/{session_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"turn_ids": turn_ids}, session_retrieve_params.SessionRetrieveParams),
+            ),
+            cast_to=Session,
+        )
+
+    def list(
+        self,
+        agent_id: str,
+        *,
+        limit: int | Omit = omit,
+        start_index: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SessionListResponse:
+        """
+        List all session(s) of a given agent.
+
+        Args:
+          limit: The number of sessions to return.
+
+          start_index: The index to start the pagination from.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        return self._get(
+            f"/v1alpha/agents/{agent_id}/sessions",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "limit": limit,
+                        "start_index": start_index,
+                    },
+                    session_list_params.SessionListParams,
+                ),
+            ),
+            cast_to=SessionListResponse,
+        )
+
+    def delete(
+        self,
+        session_id: str,
+        *,
+        agent_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Delete an agent session by its ID and its associated turns.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/v1alpha/agents/{agent_id}/session/{session_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncSessionResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSessionResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSessionResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSessionResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncSessionResourceWithStreamingResponse(self)
+
+    async def create(
+        self,
+        agent_id: str,
+        *,
+        session_name: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SessionCreateResponse:
+        """
+        Create a new session for an agent.
+
+        Args:
+          session_name: The name of the session to create.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        return await self._post(
+            f"/v1alpha/agents/{agent_id}/session",
+            body=await async_maybe_transform({"session_name": session_name}, session_create_params.SessionCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+    async def retrieve(
+        self,
+        session_id: str,
+        *,
+        agent_id: str,
+        turn_ids: SequenceNotStr[str] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Session:
+        """
+        Retrieve an agent session by its ID.
+
+        Args:
+          turn_ids: (Optional) List of turn IDs to filter the session by.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        return await self._get(
+            f"/v1alpha/agents/{agent_id}/session/{session_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {"turn_ids": turn_ids}, session_retrieve_params.SessionRetrieveParams
+                ),
+            ),
+            cast_to=Session,
+        )
+
+    async def list(
+        self,
+        agent_id: str,
+        *,
+        limit: int | Omit = omit,
+        start_index: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SessionListResponse:
+        """
+        List all session(s) of a given agent.
+
+        Args:
+          limit: The number of sessions to return.
+
+          start_index: The index to start the pagination from.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        return await self._get(
+            f"/v1alpha/agents/{agent_id}/sessions",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "limit": limit,
+                        "start_index": start_index,
+                    },
+                    session_list_params.SessionListParams,
+                ),
+            ),
+            cast_to=SessionListResponse,
+        )
+
+    async def delete(
+        self,
+        session_id: str,
+        *,
+        agent_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Delete an agent session by its ID and its associated turns.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/v1alpha/agents/{agent_id}/session/{session_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class SessionResourceWithRawResponse:
+    def __init__(self, session: SessionResource) -> None:
+        self._session = session
+
+        self.create = to_raw_response_wrapper(
+            session.create,
+        )
+        self.retrieve = to_raw_response_wrapper(
+            session.retrieve,
+        )
+        self.list = to_raw_response_wrapper(
+            session.list,
+        )
+        self.delete = to_raw_response_wrapper(
+            session.delete,
+        )
+
+
+class AsyncSessionResourceWithRawResponse:
+    def __init__(self, session: AsyncSessionResource) -> None:
+        self._session = session
+
+        self.create = async_to_raw_response_wrapper(
+            session.create,
+        )
+        self.retrieve = async_to_raw_response_wrapper(
+            session.retrieve,
+        )
+        self.list = async_to_raw_response_wrapper(
+            session.list,
+        )
+        self.delete = async_to_raw_response_wrapper(
+            session.delete,
+        )
+
+
+class SessionResourceWithStreamingResponse:
+    def __init__(self, session: SessionResource) -> None:
+        self._session = session
+
+        self.create = to_streamed_response_wrapper(
+            session.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            session.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            session.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            session.delete,
+        )
+
+
+class AsyncSessionResourceWithStreamingResponse:
+    def __init__(self, session: AsyncSessionResource) -> None:
+        self._session = session
+
+        self.create = async_to_streamed_response_wrapper(
+            session.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            session.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            session.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            session.delete,
+        )
diff --git a/src/llama_stack_client/resources/alpha/agents/steps.py b/src/llama_stack_client/resources/alpha/agents/steps.py
new file mode 100644
index 00000000..838822d0
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/agents/steps.py
@@ -0,0 +1,181 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ...._types import Body, Query, Headers, NotGiven, not_given
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...._base_client import make_request_options
+from ....types.alpha.agents.step_retrieve_response import StepRetrieveResponse
+
+__all__ = ["StepsResource", "AsyncStepsResource"]
+
+
+class StepsResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> StepsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return StepsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> StepsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return StepsResourceWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        step_id: str,
+        *,
+        agent_id: str,
+        session_id: str,
+        turn_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> StepRetrieveResponse:
+        """
+        Retrieve an agent step by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        if not turn_id:
+            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
+        if not step_id:
+            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
+        return self._get(
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=StepRetrieveResponse,
+        )
+
+
+class AsyncStepsResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncStepsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncStepsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncStepsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncStepsResourceWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        step_id: str,
+        *,
+        agent_id: str,
+        session_id: str,
+        turn_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> StepRetrieveResponse:
+        """
+        Retrieve an agent step by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        if not turn_id:
+            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
+        if not step_id:
+            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
+        return await self._get(
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=StepRetrieveResponse,
+        )
+
+
+class StepsResourceWithRawResponse:
+    def __init__(self, steps: StepsResource) -> None:
+        self._steps = steps
+
+        self.retrieve = to_raw_response_wrapper(
+            steps.retrieve,
+        )
+
+
+class AsyncStepsResourceWithRawResponse:
+    def __init__(self, steps: AsyncStepsResource) -> None:
+        self._steps = steps
+
+        self.retrieve = async_to_raw_response_wrapper(
+            steps.retrieve,
+        )
+
+
+class StepsResourceWithStreamingResponse:
+    def __init__(self, steps: StepsResource) -> None:
+        self._steps = steps
+
+        self.retrieve = to_streamed_response_wrapper(
+            steps.retrieve,
+        )
+
+
+class AsyncStepsResourceWithStreamingResponse:
+    def __init__(self, steps: AsyncStepsResource) -> None:
+        self._steps = steps
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            steps.retrieve,
+        )
diff --git a/src/llama_stack_client/resources/alpha/agents/turn.py b/src/llama_stack_client/resources/alpha/agents/turn.py
new file mode 100644
index 00000000..ffe766b6
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/agents/turn.py
@@ -0,0 +1,875 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ...._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ...._utils import required_args, maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...._streaming import Stream, AsyncStream
+from ...._base_client import make_request_options
+from ....types.alpha.agents import turn_create_params, turn_resume_params
+from ....types.alpha.agents.turn import Turn
+from ....types.alpha.tool_response_param import ToolResponseParam
+from ....types.alpha.agents.agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk
+
+__all__ = ["TurnResource", "AsyncTurnResource"]
+
+
+class TurnResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> TurnResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return TurnResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TurnResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return TurnResourceWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        session_id: str,
+        *,
+        agent_id: str,
+        messages: Iterable[turn_create_params.Message],
+        documents: Iterable[turn_create_params.Document] | Omit = omit,
+        stream: Literal[False] | Omit = omit,
+        tool_config: turn_create_params.ToolConfig | Omit = omit,
+        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn:
+        """
+        Create a new turn for an agent.
+
+        Args:
+          messages: List of messages to start the turn with.
+
+          documents: (Optional) List of documents to create the turn with.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          tool_config: (Optional) The tool configuration to create the turn with, will be used to
+              override the agent's tool_config.
+
+          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
+              to the agent's config toolgroups for the request.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        session_id: str,
+        *,
+        agent_id: str,
+        messages: Iterable[turn_create_params.Message],
+        stream: Literal[True],
+        documents: Iterable[turn_create_params.Document] | Omit = omit,
+        tool_config: turn_create_params.ToolConfig | Omit = omit,
+        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Stream[AgentTurnResponseStreamChunk]:
+        """
+        Create a new turn for an agent.
+
+        Args:
+          messages: List of messages to start the turn with.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          documents: (Optional) List of documents to create the turn with.
+
+          tool_config: (Optional) The tool configuration to create the turn with, will be used to
+              override the agent's tool_config.
+
+          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
+              to the agent's config toolgroups for the request.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        session_id: str,
+        *,
+        agent_id: str,
+        messages: Iterable[turn_create_params.Message],
+        stream: bool,
+        documents: Iterable[turn_create_params.Document] | Omit = omit,
+        tool_config: turn_create_params.ToolConfig | Omit = omit,
+        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
+        """
+        Create a new turn for an agent.
+
+        Args:
+          messages: List of messages to start the turn with.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          documents: (Optional) List of documents to create the turn with.
+
+          tool_config: (Optional) The tool configuration to create the turn with, will be used to
+              override the agent's tool_config.
+
+          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
+              to the agent's config toolgroups for the request.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["agent_id", "messages"], ["agent_id", "messages", "stream"])
+    def create(
+        self,
+        session_id: str,
+        *,
+        agent_id: str,
+        messages: Iterable[turn_create_params.Message],
+        documents: Iterable[turn_create_params.Document] | Omit = omit,
+        stream: Literal[False] | Literal[True] | Omit = omit,
+        tool_config: turn_create_params.ToolConfig | Omit = omit,
+        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        return self._post(
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "documents": documents,
+                    "stream": stream,
+                    "tool_config": tool_config,
+                    "toolgroups": toolgroups,
+                },
+                turn_create_params.TurnCreateParamsStreaming
+                if stream
+                else turn_create_params.TurnCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Turn,
+            stream=stream or False,
+            stream_cls=Stream[AgentTurnResponseStreamChunk],
+        )
+
+    def retrieve(
+        self,
+        turn_id: str,
+        *,
+        agent_id: str,
+        session_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn:
+        """
+        Retrieve an agent turn by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        if not turn_id:
+            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
+        return self._get(
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Turn,
+        )
+
+    @overload
+    def resume(
+        self,
+        turn_id: str,
+        *,
+        agent_id: str,
+        session_id: str,
+        tool_responses: Iterable[ToolResponseParam],
+        stream: Literal[False] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn:
+        """Resume an agent turn with executed tool call responses.
+
+        When a Turn has the
+        status `awaiting_input` due to pending input from client side tool calls, this
+        endpoint can be used to submit the outputs from the tool calls once they are
+        ready.
+
+        Args:
+          tool_responses: The tool call responses to resume the turn with.
+
+          stream: Whether to stream the response.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def resume(
+        self,
+        turn_id: str,
+        *,
+        agent_id: str,
+        session_id: str,
+        stream: Literal[True],
+        tool_responses: Iterable[ToolResponseParam],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Stream[AgentTurnResponseStreamChunk]:
+        """Resume an agent turn with executed tool call responses.
+
+        When a Turn has the
+        status `awaiting_input` due to pending input from client side tool calls, this
+        endpoint can be used to submit the outputs from the tool calls once they are
+        ready.
+
+        Args:
+          stream: Whether to stream the response.
+
+          tool_responses: The tool call responses to resume the turn with.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def resume(
+        self,
+        turn_id: str,
+        *,
+        agent_id: str,
+        session_id: str,
+        stream: bool,
+        tool_responses: Iterable[ToolResponseParam],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
+        """Resume an agent turn with executed tool call responses.
+
+        When a Turn has the
+        status `awaiting_input` due to pending input from client side tool calls, this
+        endpoint can be used to submit the outputs from the tool calls once they are
+        ready.
+
+        Args:
+          stream: Whether to stream the response.
+
+          tool_responses: The tool call responses to resume the turn with.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["agent_id", "session_id", "tool_responses"], ["agent_id", "session_id", "stream", "tool_responses"])
+    def resume(
+        self,
+        turn_id: str,
+        *,
+        agent_id: str,
+        session_id: str,
+        tool_responses: Iterable[ToolResponseParam],
+        stream: Literal[False] | Literal[True] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        if not turn_id:
+            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
+        return self._post(
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
+            body=maybe_transform(
+                {
+                    "tool_responses": tool_responses,
+                    "stream": stream,
+                },
+                turn_resume_params.TurnResumeParamsStreaming
+                if stream
+                else turn_resume_params.TurnResumeParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Turn,
+            stream=stream or False,
+            stream_cls=Stream[AgentTurnResponseStreamChunk],
+        )
+
+
+class AsyncTurnResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncTurnResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncTurnResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTurnResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncTurnResourceWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        session_id: str,
+        *,
+        agent_id: str,
+        messages: Iterable[turn_create_params.Message],
+        documents: Iterable[turn_create_params.Document] | Omit = omit,
+        stream: Literal[False] | Omit = omit,
+        tool_config: turn_create_params.ToolConfig | Omit = omit,
+        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn:
+        """
+        Create a new turn for an agent.
+
+        Args:
+          messages: List of messages to start the turn with.
+
+          documents: (Optional) List of documents to create the turn with.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          tool_config: (Optional) The tool configuration to create the turn with, will be used to
+              override the agent's tool_config.
+
+          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
+              to the agent's config toolgroups for the request.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        session_id: str,
+        *,
+        agent_id: str,
+        messages: Iterable[turn_create_params.Message],
+        stream: Literal[True],
+        documents: Iterable[turn_create_params.Document] | Omit = omit,
+        tool_config: turn_create_params.ToolConfig | Omit = omit,
+        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncStream[AgentTurnResponseStreamChunk]:
+        """
+        Create a new turn for an agent.
+
+        Args:
+          messages: List of messages to start the turn with.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          documents: (Optional) List of documents to create the turn with.
+
+          tool_config: (Optional) The tool configuration to create the turn with, will be used to
+              override the agent's tool_config.
+
+          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
+              to the agent's config toolgroups for the request.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        session_id: str,
+        *,
+        agent_id: str,
+        messages: Iterable[turn_create_params.Message],
+        stream: bool,
+        documents: Iterable[turn_create_params.Document] | Omit = omit,
+        tool_config: turn_create_params.ToolConfig | Omit = omit,
+        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
+        """
+        Create a new turn for an agent.
+
+        Args:
+          messages: List of messages to start the turn with.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          documents: (Optional) List of documents to create the turn with.
+
+          tool_config: (Optional) The tool configuration to create the turn with, will be used to
+              override the agent's tool_config.
+
+          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
+              to the agent's config toolgroups for the request.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["agent_id", "messages"], ["agent_id", "messages", "stream"])
+    async def create(
+        self,
+        session_id: str,
+        *,
+        agent_id: str,
+        messages: Iterable[turn_create_params.Message],
+        documents: Iterable[turn_create_params.Document] | Omit = omit,
+        stream: Literal[False] | Literal[True] | Omit = omit,
+        tool_config: turn_create_params.ToolConfig | Omit = omit,
+        toolgroups: SequenceNotStr[turn_create_params.Toolgroup] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        return await self._post(
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "documents": documents,
+                    "stream": stream,
+                    "tool_config": tool_config,
+                    "toolgroups": toolgroups,
+                },
+                turn_create_params.TurnCreateParamsStreaming
+                if stream
+                else turn_create_params.TurnCreateParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Turn,
+            stream=stream or False,
+            stream_cls=AsyncStream[AgentTurnResponseStreamChunk],
+        )
+
+    async def retrieve(
+        self,
+        turn_id: str,
+        *,
+        agent_id: str,
+        session_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn:
+        """
+        Retrieve an agent turn by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        if not turn_id:
+            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
+        return await self._get(
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Turn,
+        )
+
+    @overload
+    async def resume(
+        self,
+        turn_id: str,
+        *,
+        agent_id: str,
+        session_id: str,
+        tool_responses: Iterable[ToolResponseParam],
+        stream: Literal[False] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn:
+        """Resume an agent turn with executed tool call responses.
+
+        When a Turn has the
+        status `awaiting_input` due to pending input from client side tool calls, this
+        endpoint can be used to submit the outputs from the tool calls once they are
+        ready.
+
+        Args:
+          tool_responses: The tool call responses to resume the turn with.
+
+          stream: Whether to stream the response.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def resume(
+        self,
+        turn_id: str,
+        *,
+        agent_id: str,
+        session_id: str,
+        stream: Literal[True],
+        tool_responses: Iterable[ToolResponseParam],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncStream[AgentTurnResponseStreamChunk]:
+        """Resume an agent turn with executed tool call responses.
+
+        When a Turn has the
+        status `awaiting_input` due to pending input from client side tool calls, this
+        endpoint can be used to submit the outputs from the tool calls once they are
+        ready.
+
+        Args:
+          stream: Whether to stream the response.
+
+          tool_responses: The tool call responses to resume the turn with.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def resume(
+        self,
+        turn_id: str,
+        *,
+        agent_id: str,
+        session_id: str,
+        stream: bool,
+        tool_responses: Iterable[ToolResponseParam],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
+        """Resume an agent turn with executed tool call responses.
+
+        When a Turn has the
+        status `awaiting_input` due to pending input from client side tool calls, this
+        endpoint can be used to submit the outputs from the tool calls once they are
+        ready.
+
+        Args:
+          stream: Whether to stream the response.
+
+          tool_responses: The tool call responses to resume the turn with.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["agent_id", "session_id", "tool_responses"], ["agent_id", "session_id", "stream", "tool_responses"])
+    async def resume(
+        self,
+        turn_id: str,
+        *,
+        agent_id: str,
+        session_id: str,
+        tool_responses: Iterable[ToolResponseParam],
+        stream: Literal[False] | Literal[True] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
+        if not agent_id:
+            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
+        if not session_id:
+            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
+        if not turn_id:
+            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
+        return await self._post(
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
+            body=await async_maybe_transform(
+                {
+                    "tool_responses": tool_responses,
+                    "stream": stream,
+                },
+                turn_resume_params.TurnResumeParamsStreaming
+                if stream
+                else turn_resume_params.TurnResumeParamsNonStreaming,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Turn,
+            stream=stream or False,
+            stream_cls=AsyncStream[AgentTurnResponseStreamChunk],
+        )
+
+
+class TurnResourceWithRawResponse:
+    def __init__(self, turn: TurnResource) -> None:
+        self._turn = turn
+
+        self.create = to_raw_response_wrapper(
+            turn.create,
+        )
+        self.retrieve = to_raw_response_wrapper(
+            turn.retrieve,
+        )
+        self.resume = to_raw_response_wrapper(
+            turn.resume,
+        )
+
+
+class AsyncTurnResourceWithRawResponse:
+    def __init__(self, turn: AsyncTurnResource) -> None:
+        self._turn = turn
+
+        self.create = async_to_raw_response_wrapper(
+            turn.create,
+        )
+        self.retrieve = async_to_raw_response_wrapper(
+            turn.retrieve,
+        )
+        self.resume = async_to_raw_response_wrapper(
+            turn.resume,
+        )
+
+
+class TurnResourceWithStreamingResponse:
+    def __init__(self, turn: TurnResource) -> None:
+        self._turn = turn
+
+        self.create = to_streamed_response_wrapper(
+            turn.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            turn.retrieve,
+        )
+        self.resume = to_streamed_response_wrapper(
+            turn.resume,
+        )
+
+
+class AsyncTurnResourceWithStreamingResponse:
+    def __init__(self, turn: AsyncTurnResource) -> None:
+        self._turn = turn
+
+        self.create = async_to_streamed_response_wrapper(
+            turn.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            turn.retrieve,
+        )
+        self.resume = async_to_streamed_response_wrapper(
+            turn.resume,
+        )
diff --git a/src/llama_stack_client/resources/alpha/alpha.py b/src/llama_stack_client/resources/alpha/alpha.py
new file mode 100644
index 00000000..63ae7e3c
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/alpha.py
@@ -0,0 +1,230 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from .eval.eval import (
+    EvalResource,
+    AsyncEvalResource,
+    EvalResourceWithRawResponse,
+    AsyncEvalResourceWithRawResponse,
+    EvalResourceWithStreamingResponse,
+    AsyncEvalResourceWithStreamingResponse,
+)
+from .inference import (
+    InferenceResource,
+    AsyncInferenceResource,
+    InferenceResourceWithRawResponse,
+    AsyncInferenceResourceWithRawResponse,
+    InferenceResourceWithStreamingResponse,
+    AsyncInferenceResourceWithStreamingResponse,
+)
+from .benchmarks import (
+    BenchmarksResource,
+    AsyncBenchmarksResource,
+    BenchmarksResourceWithRawResponse,
+    AsyncBenchmarksResourceWithRawResponse,
+    BenchmarksResourceWithStreamingResponse,
+    AsyncBenchmarksResourceWithStreamingResponse,
+)
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .agents.agents import (
+    AgentsResource,
+    AsyncAgentsResource,
+    AgentsResourceWithRawResponse,
+    AsyncAgentsResourceWithRawResponse,
+    AgentsResourceWithStreamingResponse,
+    AsyncAgentsResourceWithStreamingResponse,
+)
+from .post_training.post_training import (
+    PostTrainingResource,
+    AsyncPostTrainingResource,
+    PostTrainingResourceWithRawResponse,
+    AsyncPostTrainingResourceWithRawResponse,
+    PostTrainingResourceWithStreamingResponse,
+    AsyncPostTrainingResourceWithStreamingResponse,
+)
+
+__all__ = ["AlphaResource", "AsyncAlphaResource"]
+
+
+class AlphaResource(SyncAPIResource):
+    @cached_property
+    def inference(self) -> InferenceResource:
+        return InferenceResource(self._client)
+
+    @cached_property
+    def post_training(self) -> PostTrainingResource:
+        return PostTrainingResource(self._client)
+
+    @cached_property
+    def benchmarks(self) -> BenchmarksResource:
+        return BenchmarksResource(self._client)
+
+    @cached_property
+    def eval(self) -> EvalResource:
+        return EvalResource(self._client)
+
+    @cached_property
+    def agents(self) -> AgentsResource:
+        return AgentsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AlphaResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AlphaResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AlphaResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AlphaResourceWithStreamingResponse(self)
+
+
+class AsyncAlphaResource(AsyncAPIResource):
+    @cached_property
+    def inference(self) -> AsyncInferenceResource:
+        return AsyncInferenceResource(self._client)
+
+    @cached_property
+    def post_training(self) -> AsyncPostTrainingResource:
+        return AsyncPostTrainingResource(self._client)
+
+    @cached_property
+    def benchmarks(self) -> AsyncBenchmarksResource:
+        return AsyncBenchmarksResource(self._client)
+
+    @cached_property
+    def eval(self) -> AsyncEvalResource:
+        return AsyncEvalResource(self._client)
+
+    @cached_property
+    def agents(self) -> AsyncAgentsResource:
+        return AsyncAgentsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncAlphaResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAlphaResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAlphaResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncAlphaResourceWithStreamingResponse(self)
+
+
+class AlphaResourceWithRawResponse:
+    def __init__(self, alpha: AlphaResource) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def inference(self) -> InferenceResourceWithRawResponse:
+        return InferenceResourceWithRawResponse(self._alpha.inference)
+
+    @cached_property
+    def post_training(self) -> PostTrainingResourceWithRawResponse:
+        return PostTrainingResourceWithRawResponse(self._alpha.post_training)
+
+    @cached_property
+    def benchmarks(self) -> BenchmarksResourceWithRawResponse:
+        return BenchmarksResourceWithRawResponse(self._alpha.benchmarks)
+
+    @cached_property
+    def eval(self) -> EvalResourceWithRawResponse:
+        return EvalResourceWithRawResponse(self._alpha.eval)
+
+    @cached_property
+    def agents(self) -> AgentsResourceWithRawResponse:
+        return AgentsResourceWithRawResponse(self._alpha.agents)
+
+
+class AsyncAlphaResourceWithRawResponse:
+    def __init__(self, alpha: AsyncAlphaResource) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def inference(self) -> AsyncInferenceResourceWithRawResponse:
+        return AsyncInferenceResourceWithRawResponse(self._alpha.inference)
+
+    @cached_property
+    def post_training(self) -> AsyncPostTrainingResourceWithRawResponse:
+        return AsyncPostTrainingResourceWithRawResponse(self._alpha.post_training)
+
+    @cached_property
+    def benchmarks(self) -> AsyncBenchmarksResourceWithRawResponse:
+        return AsyncBenchmarksResourceWithRawResponse(self._alpha.benchmarks)
+
+    @cached_property
+    def eval(self) -> AsyncEvalResourceWithRawResponse:
+        return AsyncEvalResourceWithRawResponse(self._alpha.eval)
+
+    @cached_property
+    def agents(self) -> AsyncAgentsResourceWithRawResponse:
+        return AsyncAgentsResourceWithRawResponse(self._alpha.agents)
+
+
+class AlphaResourceWithStreamingResponse:
+    def __init__(self, alpha: AlphaResource) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def inference(self) -> InferenceResourceWithStreamingResponse:
+        return InferenceResourceWithStreamingResponse(self._alpha.inference)
+
+    @cached_property
+    def post_training(self) -> PostTrainingResourceWithStreamingResponse:
+        return PostTrainingResourceWithStreamingResponse(self._alpha.post_training)
+
+    @cached_property
+    def benchmarks(self) -> BenchmarksResourceWithStreamingResponse:
+        return BenchmarksResourceWithStreamingResponse(self._alpha.benchmarks)
+
+    @cached_property
+    def eval(self) -> EvalResourceWithStreamingResponse:
+        return EvalResourceWithStreamingResponse(self._alpha.eval)
+
+    @cached_property
+    def agents(self) -> AgentsResourceWithStreamingResponse:
+        return AgentsResourceWithStreamingResponse(self._alpha.agents)
+
+
+class AsyncAlphaResourceWithStreamingResponse:
+    def __init__(self, alpha: AsyncAlphaResource) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def inference(self) -> AsyncInferenceResourceWithStreamingResponse:
+        return AsyncInferenceResourceWithStreamingResponse(self._alpha.inference)
+
+    @cached_property
+    def post_training(self) -> AsyncPostTrainingResourceWithStreamingResponse:
+        return AsyncPostTrainingResourceWithStreamingResponse(self._alpha.post_training)
+
+    @cached_property
+    def benchmarks(self) -> AsyncBenchmarksResourceWithStreamingResponse:
+        return AsyncBenchmarksResourceWithStreamingResponse(self._alpha.benchmarks)
+
+    @cached_property
+    def eval(self) -> AsyncEvalResourceWithStreamingResponse:
+        return AsyncEvalResourceWithStreamingResponse(self._alpha.eval)
+
+    @cached_property
+    def agents(self) -> AsyncAgentsResourceWithStreamingResponse:
+        return AsyncAgentsResourceWithStreamingResponse(self._alpha.agents)
diff --git a/src/llama_stack_client/resources/alpha/benchmarks.py b/src/llama_stack_client/resources/alpha/benchmarks.py
new file mode 100644
index 00000000..333b9578
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/benchmarks.py
@@ -0,0 +1,359 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Type, Union, Iterable, cast
+
+import httpx
+
+from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, SequenceNotStr, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..._wrappers import DataWrapper
+from ...types.alpha import benchmark_register_params
+from ..._base_client import make_request_options
+from ...types.alpha.benchmark import Benchmark
+from ...types.alpha.benchmark_list_response import BenchmarkListResponse
+
+__all__ = ["BenchmarksResource", "AsyncBenchmarksResource"]
+
+
+class BenchmarksResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BenchmarksResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return BenchmarksResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BenchmarksResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return BenchmarksResourceWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        benchmark_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Benchmark:
+        """
+        Get a benchmark by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        return self._get(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Benchmark,
+        )
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> BenchmarkListResponse:
+        """List all benchmarks."""
+        return self._get(
+            "/v1alpha/eval/benchmarks",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=DataWrapper[BenchmarkListResponse]._unwrapper,
+            ),
+            cast_to=cast(Type[BenchmarkListResponse], DataWrapper[BenchmarkListResponse]),
+        )
+
+    def register(
+        self,
+        *,
+        benchmark_id: str,
+        dataset_id: str,
+        scoring_functions: SequenceNotStr[str],
+        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | Omit = omit,
+        provider_benchmark_id: str | Omit = omit,
+        provider_id: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Register a benchmark.
+
+        Args:
+          benchmark_id: The ID of the benchmark to register.
+
+          dataset_id: The ID of the dataset to use for the benchmark.
+
+          scoring_functions: The scoring functions to use for the benchmark.
+
+          metadata: The metadata to use for the benchmark.
+
+          provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
+
+          provider_id: The ID of the provider to use for the benchmark.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._post(
+            "/v1alpha/eval/benchmarks",
+            body=maybe_transform(
+                {
+                    "benchmark_id": benchmark_id,
+                    "dataset_id": dataset_id,
+                    "scoring_functions": scoring_functions,
+                    "metadata": metadata,
+                    "provider_benchmark_id": provider_benchmark_id,
+                    "provider_id": provider_id,
+                },
+                benchmark_register_params.BenchmarkRegisterParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncBenchmarksResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBenchmarksResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBenchmarksResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBenchmarksResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncBenchmarksResourceWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        benchmark_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Benchmark:
+        """
+        Get a benchmark by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        return await self._get(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Benchmark,
+        )
+
+    async def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> BenchmarkListResponse:
+        """List all benchmarks."""
+        return await self._get(
+            "/v1alpha/eval/benchmarks",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=DataWrapper[BenchmarkListResponse]._unwrapper,
+            ),
+            cast_to=cast(Type[BenchmarkListResponse], DataWrapper[BenchmarkListResponse]),
+        )
+
+    async def register(
+        self,
+        *,
+        benchmark_id: str,
+        dataset_id: str,
+        scoring_functions: SequenceNotStr[str],
+        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | Omit = omit,
+        provider_benchmark_id: str | Omit = omit,
+        provider_id: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Register a benchmark.
+
+        Args:
+          benchmark_id: The ID of the benchmark to register.
+
+          dataset_id: The ID of the dataset to use for the benchmark.
+
+          scoring_functions: The scoring functions to use for the benchmark.
+
+          metadata: The metadata to use for the benchmark.
+
+          provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
+
+          provider_id: The ID of the provider to use for the benchmark.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._post(
+            "/v1alpha/eval/benchmarks",
+            body=await async_maybe_transform(
+                {
+                    "benchmark_id": benchmark_id,
+                    "dataset_id": dataset_id,
+                    "scoring_functions": scoring_functions,
+                    "metadata": metadata,
+                    "provider_benchmark_id": provider_benchmark_id,
+                    "provider_id": provider_id,
+                },
+                benchmark_register_params.BenchmarkRegisterParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class BenchmarksResourceWithRawResponse:
+    def __init__(self, benchmarks: BenchmarksResource) -> None:
+        self._benchmarks = benchmarks
+
+        self.retrieve = to_raw_response_wrapper(
+            benchmarks.retrieve,
+        )
+        self.list = to_raw_response_wrapper(
+            benchmarks.list,
+        )
+        self.register = to_raw_response_wrapper(
+            benchmarks.register,
+        )
+
+
+class AsyncBenchmarksResourceWithRawResponse:
+    def __init__(self, benchmarks: AsyncBenchmarksResource) -> None:
+        self._benchmarks = benchmarks
+
+        self.retrieve = async_to_raw_response_wrapper(
+            benchmarks.retrieve,
+        )
+        self.list = async_to_raw_response_wrapper(
+            benchmarks.list,
+        )
+        self.register = async_to_raw_response_wrapper(
+            benchmarks.register,
+        )
+
+
+class BenchmarksResourceWithStreamingResponse:
+    def __init__(self, benchmarks: BenchmarksResource) -> None:
+        self._benchmarks = benchmarks
+
+        self.retrieve = to_streamed_response_wrapper(
+            benchmarks.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            benchmarks.list,
+        )
+        self.register = to_streamed_response_wrapper(
+            benchmarks.register,
+        )
+
+
+class AsyncBenchmarksResourceWithStreamingResponse:
+    def __init__(self, benchmarks: AsyncBenchmarksResource) -> None:
+        self._benchmarks = benchmarks
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            benchmarks.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            benchmarks.list,
+        )
+        self.register = async_to_streamed_response_wrapper(
+            benchmarks.register,
+        )
diff --git a/src/llama_stack_client/resources/alpha/eval/__init__.py b/src/llama_stack_client/resources/alpha/eval/__init__.py
new file mode 100644
index 00000000..f6473395
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/eval/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .eval import (
+    EvalResource,
+    AsyncEvalResource,
+    EvalResourceWithRawResponse,
+    AsyncEvalResourceWithRawResponse,
+    EvalResourceWithStreamingResponse,
+    AsyncEvalResourceWithStreamingResponse,
+)
+from .jobs import (
+    JobsResource,
+    AsyncJobsResource,
+    JobsResourceWithRawResponse,
+    AsyncJobsResourceWithRawResponse,
+    JobsResourceWithStreamingResponse,
+    AsyncJobsResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "JobsResource",
+    "AsyncJobsResource",
+    "JobsResourceWithRawResponse",
+    "AsyncJobsResourceWithRawResponse",
+    "JobsResourceWithStreamingResponse",
+    "AsyncJobsResourceWithStreamingResponse",
+    "EvalResource",
+    "AsyncEvalResource",
+    "EvalResourceWithRawResponse",
+    "AsyncEvalResourceWithRawResponse",
+    "EvalResourceWithStreamingResponse",
+    "AsyncEvalResourceWithStreamingResponse",
+]
diff --git a/src/llama_stack_client/resources/alpha/eval/eval.py b/src/llama_stack_client/resources/alpha/eval/eval.py
new file mode 100644
index 00000000..b5347c0b
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/eval/eval.py
@@ -0,0 +1,530 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+
+import httpx
+
+from .jobs import (
+    JobsResource,
+    AsyncJobsResource,
+    JobsResourceWithRawResponse,
+    AsyncJobsResourceWithRawResponse,
+    JobsResourceWithStreamingResponse,
+    AsyncJobsResourceWithStreamingResponse,
+)
+from ...._types import Body, Query, Headers, NotGiven, SequenceNotStr, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ....types.alpha import (
+    eval_run_eval_params,
+    eval_evaluate_rows_params,
+    eval_run_eval_alpha_params,
+    eval_evaluate_rows_alpha_params,
+)
+from ...._base_client import make_request_options
+from ....types.alpha.job import Job
+from ....types.alpha.evaluate_response import EvaluateResponse
+from ....types.alpha.benchmark_config_param import BenchmarkConfigParam
+
+__all__ = ["EvalResource", "AsyncEvalResource"]
+
+
+class EvalResource(SyncAPIResource):
+    @cached_property
+    def jobs(self) -> JobsResource:
+        return JobsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> EvalResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return EvalResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> EvalResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return EvalResourceWithStreamingResponse(self)
+
+    def evaluate_rows(
+        self,
+        benchmark_id: str,
+        *,
+        benchmark_config: BenchmarkConfigParam,
+        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
+        scoring_functions: SequenceNotStr[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvaluateResponse:
+        """
+        Evaluate a list of rows on a benchmark.
+
+        Args:
+          benchmark_config: The configuration for the benchmark.
+
+          input_rows: The rows to evaluate.
+
+          scoring_functions: The scoring functions to use for the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        return self._post(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
+            body=maybe_transform(
+                {
+                    "benchmark_config": benchmark_config,
+                    "input_rows": input_rows,
+                    "scoring_functions": scoring_functions,
+                },
+                eval_evaluate_rows_params.EvalEvaluateRowsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvaluateResponse,
+        )
+
+    def evaluate_rows_alpha(
+        self,
+        benchmark_id: str,
+        *,
+        benchmark_config: BenchmarkConfigParam,
+        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
+        scoring_functions: SequenceNotStr[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvaluateResponse:
+        """
+        Evaluate a list of rows on a benchmark.
+
+        Args:
+          benchmark_config: The configuration for the benchmark.
+
+          input_rows: The rows to evaluate.
+
+          scoring_functions: The scoring functions to use for the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        return self._post(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
+            body=maybe_transform(
+                {
+                    "benchmark_config": benchmark_config,
+                    "input_rows": input_rows,
+                    "scoring_functions": scoring_functions,
+                },
+                eval_evaluate_rows_alpha_params.EvalEvaluateRowsAlphaParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvaluateResponse,
+        )
+
+    def run_eval(
+        self,
+        benchmark_id: str,
+        *,
+        benchmark_config: BenchmarkConfigParam,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Job:
+        """
+        Run an evaluation on a benchmark.
+
+        Args:
+          benchmark_config: The configuration for the benchmark.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        return self._post(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
+            body=maybe_transform({"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Job,
+        )
+
+    def run_eval_alpha(
+        self,
+        benchmark_id: str,
+        *,
+        benchmark_config: BenchmarkConfigParam,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Job:
+        """
+        Run an evaluation on a benchmark.
+
+        Args:
+          benchmark_config: The configuration for the benchmark.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        return self._post(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
+            body=maybe_transform(
+                {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Job,
+        )
+
+
+class AsyncEvalResource(AsyncAPIResource):
+    @cached_property
+    def jobs(self) -> AsyncJobsResource:
+        return AsyncJobsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncEvalResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncEvalResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncEvalResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncEvalResourceWithStreamingResponse(self)
+
+    async def evaluate_rows(
+        self,
+        benchmark_id: str,
+        *,
+        benchmark_config: BenchmarkConfigParam,
+        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
+        scoring_functions: SequenceNotStr[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvaluateResponse:
+        """
+        Evaluate a list of rows on a benchmark.
+
+        Args:
+          benchmark_config: The configuration for the benchmark.
+
+          input_rows: The rows to evaluate.
+
+          scoring_functions: The scoring functions to use for the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        return await self._post(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
+            body=await async_maybe_transform(
+                {
+                    "benchmark_config": benchmark_config,
+                    "input_rows": input_rows,
+                    "scoring_functions": scoring_functions,
+                },
+                eval_evaluate_rows_params.EvalEvaluateRowsParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvaluateResponse,
+        )
+
+    async def evaluate_rows_alpha(
+        self,
+        benchmark_id: str,
+        *,
+        benchmark_config: BenchmarkConfigParam,
+        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
+        scoring_functions: SequenceNotStr[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvaluateResponse:
+        """
+        Evaluate a list of rows on a benchmark.
+
+        Args:
+          benchmark_config: The configuration for the benchmark.
+
+          input_rows: The rows to evaluate.
+
+          scoring_functions: The scoring functions to use for the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        return await self._post(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
+            body=await async_maybe_transform(
+                {
+                    "benchmark_config": benchmark_config,
+                    "input_rows": input_rows,
+                    "scoring_functions": scoring_functions,
+                },
+                eval_evaluate_rows_alpha_params.EvalEvaluateRowsAlphaParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvaluateResponse,
+        )
+
+    async def run_eval(
+        self,
+        benchmark_id: str,
+        *,
+        benchmark_config: BenchmarkConfigParam,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Job:
+        """
+        Run an evaluation on a benchmark.
+
+        Args:
+          benchmark_config: The configuration for the benchmark.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        return await self._post(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
+            body=await async_maybe_transform(
+                {"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Job,
+        )
+
+    async def run_eval_alpha(
+        self,
+        benchmark_id: str,
+        *,
+        benchmark_config: BenchmarkConfigParam,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Job:
+        """
+        Run an evaluation on a benchmark.
+
+        Args:
+          benchmark_config: The configuration for the benchmark.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        return await self._post(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
+            body=await async_maybe_transform(
+                {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Job,
+        )
+
+
+class EvalResourceWithRawResponse:
+    def __init__(self, eval: EvalResource) -> None:
+        self._eval = eval
+
+        self.evaluate_rows = to_raw_response_wrapper(
+            eval.evaluate_rows,
+        )
+        self.evaluate_rows_alpha = to_raw_response_wrapper(
+            eval.evaluate_rows_alpha,
+        )
+        self.run_eval = to_raw_response_wrapper(
+            eval.run_eval,
+        )
+        self.run_eval_alpha = to_raw_response_wrapper(
+            eval.run_eval_alpha,
+        )
+
+    @cached_property
+    def jobs(self) -> JobsResourceWithRawResponse:
+        return JobsResourceWithRawResponse(self._eval.jobs)
+
+
+class AsyncEvalResourceWithRawResponse:
+    def __init__(self, eval: AsyncEvalResource) -> None:
+        self._eval = eval
+
+        self.evaluate_rows = async_to_raw_response_wrapper(
+            eval.evaluate_rows,
+        )
+        self.evaluate_rows_alpha = async_to_raw_response_wrapper(
+            eval.evaluate_rows_alpha,
+        )
+        self.run_eval = async_to_raw_response_wrapper(
+            eval.run_eval,
+        )
+        self.run_eval_alpha = async_to_raw_response_wrapper(
+            eval.run_eval_alpha,
+        )
+
+    @cached_property
+    def jobs(self) -> AsyncJobsResourceWithRawResponse:
+        return AsyncJobsResourceWithRawResponse(self._eval.jobs)
+
+
+class EvalResourceWithStreamingResponse:
+    def __init__(self, eval: EvalResource) -> None:
+        self._eval = eval
+
+        self.evaluate_rows = to_streamed_response_wrapper(
+            eval.evaluate_rows,
+        )
+        self.evaluate_rows_alpha = to_streamed_response_wrapper(
+            eval.evaluate_rows_alpha,
+        )
+        self.run_eval = to_streamed_response_wrapper(
+            eval.run_eval,
+        )
+        self.run_eval_alpha = to_streamed_response_wrapper(
+            eval.run_eval_alpha,
+        )
+
+    @cached_property
+    def jobs(self) -> JobsResourceWithStreamingResponse:
+        return JobsResourceWithStreamingResponse(self._eval.jobs)
+
+
+class AsyncEvalResourceWithStreamingResponse:
+    def __init__(self, eval: AsyncEvalResource) -> None:
+        self._eval = eval
+
+        self.evaluate_rows = async_to_streamed_response_wrapper(
+            eval.evaluate_rows,
+        )
+        self.evaluate_rows_alpha = async_to_streamed_response_wrapper(
+            eval.evaluate_rows_alpha,
+        )
+        self.run_eval = async_to_streamed_response_wrapper(
+            eval.run_eval,
+        )
+        self.run_eval_alpha = async_to_streamed_response_wrapper(
+            eval.run_eval_alpha,
+        )
+
+    @cached_property
+    def jobs(self) -> AsyncJobsResourceWithStreamingResponse:
+        return AsyncJobsResourceWithStreamingResponse(self._eval.jobs)
diff --git a/src/llama_stack_client/resources/alpha/eval/jobs.py b/src/llama_stack_client/resources/alpha/eval/jobs.py
new file mode 100644
index 00000000..8f0fa026
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/eval/jobs.py
@@ -0,0 +1,340 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from ...._types import Body, Query, Headers, NoneType, NotGiven, not_given
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...._base_client import make_request_options
+from ....types.alpha.job import Job
+from ....types.alpha.evaluate_response import EvaluateResponse
+
+__all__ = ["JobsResource", "AsyncJobsResource"]
+
+
+class JobsResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> JobsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return JobsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> JobsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return JobsResourceWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        job_id: str,
+        *,
+        benchmark_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvaluateResponse:
+        """
+        Get the result of a job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        if not job_id:
+            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
+        return self._get(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvaluateResponse,
+        )
+
+    def cancel(
+        self,
+        job_id: str,
+        *,
+        benchmark_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Cancel a job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        if not job_id:
+            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    def status(
+        self,
+        job_id: str,
+        *,
+        benchmark_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Job:
+        """
+        Get the status of a job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        if not job_id:
+            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
+        return self._get(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Job,
+        )
+
+
+class AsyncJobsResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncJobsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncJobsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncJobsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncJobsResourceWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        job_id: str,
+        *,
+        benchmark_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EvaluateResponse:
+        """
+        Get the result of a job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        if not job_id:
+            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
+        return await self._get(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvaluateResponse,
+        )
+
+    async def cancel(
+        self,
+        job_id: str,
+        *,
+        benchmark_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Cancel a job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        if not job_id:
+            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    async def status(
+        self,
+        job_id: str,
+        *,
+        benchmark_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Job:
+        """
+        Get the status of a job.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not benchmark_id:
+            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
+        if not job_id:
+            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
+        return await self._get(
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Job,
+        )
+
+
+class JobsResourceWithRawResponse:
+    def __init__(self, jobs: JobsResource) -> None:
+        self._jobs = jobs
+
+        self.retrieve = to_raw_response_wrapper(
+            jobs.retrieve,
+        )
+        self.cancel = to_raw_response_wrapper(
+            jobs.cancel,
+        )
+        self.status = to_raw_response_wrapper(
+            jobs.status,
+        )
+
+
+class AsyncJobsResourceWithRawResponse:
+    def __init__(self, jobs: AsyncJobsResource) -> None:
+        self._jobs = jobs
+
+        self.retrieve = async_to_raw_response_wrapper(
+            jobs.retrieve,
+        )
+        self.cancel = async_to_raw_response_wrapper(
+            jobs.cancel,
+        )
+        self.status = async_to_raw_response_wrapper(
+            jobs.status,
+        )
+
+
+class JobsResourceWithStreamingResponse:
+    def __init__(self, jobs: JobsResource) -> None:
+        self._jobs = jobs
+
+        self.retrieve = to_streamed_response_wrapper(
+            jobs.retrieve,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            jobs.cancel,
+        )
+        self.status = to_streamed_response_wrapper(
+            jobs.status,
+        )
+
+
+class AsyncJobsResourceWithStreamingResponse:
+    def __init__(self, jobs: AsyncJobsResource) -> None:
+        self._jobs = jobs
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            jobs.retrieve,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            jobs.cancel,
+        )
+        self.status = async_to_streamed_response_wrapper(
+            jobs.status,
+        )
diff --git a/src/llama_stack_client/resources/alpha/inference.py b/src/llama_stack_client/resources/alpha/inference.py
new file mode 100644
index 00000000..ca259357
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/inference.py
@@ -0,0 +1,218 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Type, cast
+
+import httpx
+
+from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..._wrappers import DataWrapper
+from ...types.alpha import inference_rerank_params
+from ..._base_client import make_request_options
+from ...types.alpha.inference_rerank_response import InferenceRerankResponse
+
+__all__ = ["InferenceResource", "AsyncInferenceResource"]
+
+
+class InferenceResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> InferenceResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return InferenceResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> InferenceResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return InferenceResourceWithStreamingResponse(self)
+
+    def rerank(
+        self,
+        *,
+        items: SequenceNotStr[inference_rerank_params.Item],
+        model: str,
+        query: inference_rerank_params.Query,
+        max_num_results: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> InferenceRerankResponse:
+        """
+        Rerank a list of documents based on their relevance to a query.
+
+        Args:
+          items: List of items to rerank. Each item can be a string, text content part, or image
+              content part. Each input must not exceed the model's max input token length.
+
+          model: The identifier of the reranking model to use.
+
+          query: The search query to rank items against. Can be a string, text content part, or
+              image content part. The input must not exceed the model's max input token
+              length.
+
+          max_num_results: (Optional) Maximum number of results to return. Default: returns all.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1alpha/inference/rerank",
+            body=maybe_transform(
+                {
+                    "items": items,
+                    "model": model,
+                    "query": query,
+                    "max_num_results": max_num_results,
+                },
+                inference_rerank_params.InferenceRerankParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=DataWrapper[InferenceRerankResponse]._unwrapper,
+            ),
+            cast_to=cast(Type[InferenceRerankResponse], DataWrapper[InferenceRerankResponse]),
+        )
+
+
+class AsyncInferenceResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncInferenceResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncInferenceResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncInferenceResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncInferenceResourceWithStreamingResponse(self)
+
+    async def rerank(
+        self,
+        *,
+        items: SequenceNotStr[inference_rerank_params.Item],
+        model: str,
+        query: inference_rerank_params.Query,
+        max_num_results: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> InferenceRerankResponse:
+        """
+        Rerank a list of documents based on their relevance to a query.
+
+        Args:
+          items: List of items to rerank. Each item can be a string, text content part, or image
+              content part. Each input must not exceed the model's max input token length.
+
+          model: The identifier of the reranking model to use.
+
+          query: The search query to rank items against. Can be a string, text content part, or
+              image content part. The input must not exceed the model's max input token
+              length.
+
+          max_num_results: (Optional) Maximum number of results to return. Default: returns all.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1alpha/inference/rerank",
+            body=await async_maybe_transform(
+                {
+                    "items": items,
+                    "model": model,
+                    "query": query,
+                    "max_num_results": max_num_results,
+                },
+                inference_rerank_params.InferenceRerankParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=DataWrapper[InferenceRerankResponse]._unwrapper,
+            ),
+            cast_to=cast(Type[InferenceRerankResponse], DataWrapper[InferenceRerankResponse]),
+        )
+
+
+class InferenceResourceWithRawResponse:
+    def __init__(self, inference: InferenceResource) -> None:
+        self._inference = inference
+
+        self.rerank = to_raw_response_wrapper(
+            inference.rerank,
+        )
+
+
+class AsyncInferenceResourceWithRawResponse:
+    def __init__(self, inference: AsyncInferenceResource) -> None:
+        self._inference = inference
+
+        self.rerank = async_to_raw_response_wrapper(
+            inference.rerank,
+        )
+
+
+class InferenceResourceWithStreamingResponse:
+    def __init__(self, inference: InferenceResource) -> None:
+        self._inference = inference
+
+        self.rerank = to_streamed_response_wrapper(
+            inference.rerank,
+        )
+
+
+class AsyncInferenceResourceWithStreamingResponse:
+    def __init__(self, inference: AsyncInferenceResource) -> None:
+        self._inference = inference
+
+        self.rerank = async_to_streamed_response_wrapper(
+            inference.rerank,
+        )
diff --git a/src/llama_stack_client/resources/alpha/post_training/__init__.py b/src/llama_stack_client/resources/alpha/post_training/__init__.py
new file mode 100644
index 00000000..e1fa2361
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/post_training/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .job import (
+    JobResource,
+    AsyncJobResource,
+    JobResourceWithRawResponse,
+    AsyncJobResourceWithRawResponse,
+    JobResourceWithStreamingResponse,
+    AsyncJobResourceWithStreamingResponse,
+)
+from .post_training import (
+    PostTrainingResource,
+    AsyncPostTrainingResource,
+    PostTrainingResourceWithRawResponse,
+    AsyncPostTrainingResourceWithRawResponse,
+    PostTrainingResourceWithStreamingResponse,
+    AsyncPostTrainingResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "JobResource",
+    "AsyncJobResource",
+    "JobResourceWithRawResponse",
+    "AsyncJobResourceWithRawResponse",
+    "JobResourceWithStreamingResponse",
+    "AsyncJobResourceWithStreamingResponse",
+    "PostTrainingResource",
+    "AsyncPostTrainingResource",
+    "PostTrainingResourceWithRawResponse",
+    "AsyncPostTrainingResourceWithRawResponse",
+    "PostTrainingResourceWithStreamingResponse",
+    "AsyncPostTrainingResourceWithStreamingResponse",
+]
diff --git a/src/llama_stack_client/resources/alpha/post_training/job.py b/src/llama_stack_client/resources/alpha/post_training/job.py
new file mode 100644
index 00000000..d9b7173e
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/post_training/job.py
@@ -0,0 +1,404 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Type, cast
+
+import httpx
+
+from ...._types import Body, Query, Headers, NoneType, NotGiven, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...._wrappers import DataWrapper
+from ...._base_client import make_request_options
+from ....types.alpha.post_training import job_cancel_params, job_status_params, job_artifacts_params
+from ....types.alpha.post_training.job_list_response import JobListResponse
+from ....types.alpha.post_training.job_status_response import JobStatusResponse
+from ....types.alpha.post_training.job_artifacts_response import JobArtifactsResponse
+
+__all__ = ["JobResource", "AsyncJobResource"]
+
+
+class JobResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> JobResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return JobResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> JobResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return JobResourceWithStreamingResponse(self)
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> JobListResponse:
+        """Get all training jobs."""
+        return self._get(
+            "/v1alpha/post-training/jobs",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=DataWrapper[JobListResponse]._unwrapper,
+            ),
+            cast_to=cast(Type[JobListResponse], DataWrapper[JobListResponse]),
+        )
+
+    def artifacts(
+        self,
+        *,
+        job_uuid: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> JobArtifactsResponse:
+        """
+        Get the artifacts of a training job.
+
+        Args:
+          job_uuid: The UUID of the job to get the artifacts of.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get(
+            "/v1alpha/post-training/job/artifacts",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"job_uuid": job_uuid}, job_artifacts_params.JobArtifactsParams),
+            ),
+            cast_to=JobArtifactsResponse,
+        )
+
+    def cancel(
+        self,
+        *,
+        job_uuid: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Cancel a training job.
+
+        Args:
+          job_uuid: The UUID of the job to cancel.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._post(
+            "/v1alpha/post-training/job/cancel",
+            body=maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    def status(
+        self,
+        *,
+        job_uuid: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> JobStatusResponse:
+        """
+        Get the status of a training job.
+
+        Args:
+          job_uuid: The UUID of the job to get the status of.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get(
+            "/v1alpha/post-training/job/status",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"job_uuid": job_uuid}, job_status_params.JobStatusParams),
+            ),
+            cast_to=JobStatusResponse,
+        )
+
+
+class AsyncJobResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncJobResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncJobResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncJobResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncJobResourceWithStreamingResponse(self)
+
+    async def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> JobListResponse:
+        """Get all training jobs."""
+        return await self._get(
+            "/v1alpha/post-training/jobs",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=DataWrapper[JobListResponse]._unwrapper,
+            ),
+            cast_to=cast(Type[JobListResponse], DataWrapper[JobListResponse]),
+        )
+
+    async def artifacts(
+        self,
+        *,
+        job_uuid: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> JobArtifactsResponse:
+        """
+        Get the artifacts of a training job.
+
+        Args:
+          job_uuid: The UUID of the job to get the artifacts of.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._get(
+            "/v1alpha/post-training/job/artifacts",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"job_uuid": job_uuid}, job_artifacts_params.JobArtifactsParams),
+            ),
+            cast_to=JobArtifactsResponse,
+        )
+
+    async def cancel(
+        self,
+        *,
+        job_uuid: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Cancel a training job.
+
+        Args:
+          job_uuid: The UUID of the job to cancel.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._post(
+            "/v1alpha/post-training/job/cancel",
+            body=await async_maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    async def status(
+        self,
+        *,
+        job_uuid: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> JobStatusResponse:
+        """
+        Get the status of a training job.
+
+        Args:
+          job_uuid: The UUID of the job to get the status of.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._get(
+            "/v1alpha/post-training/job/status",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"job_uuid": job_uuid}, job_status_params.JobStatusParams),
+            ),
+            cast_to=JobStatusResponse,
+        )
+
+
+class JobResourceWithRawResponse:
+    def __init__(self, job: JobResource) -> None:
+        self._job = job
+
+        self.list = to_raw_response_wrapper(
+            job.list,
+        )
+        self.artifacts = to_raw_response_wrapper(
+            job.artifacts,
+        )
+        self.cancel = to_raw_response_wrapper(
+            job.cancel,
+        )
+        self.status = to_raw_response_wrapper(
+            job.status,
+        )
+
+
+class AsyncJobResourceWithRawResponse:
+    def __init__(self, job: AsyncJobResource) -> None:
+        self._job = job
+
+        self.list = async_to_raw_response_wrapper(
+            job.list,
+        )
+        self.artifacts = async_to_raw_response_wrapper(
+            job.artifacts,
+        )
+        self.cancel = async_to_raw_response_wrapper(
+            job.cancel,
+        )
+        self.status = async_to_raw_response_wrapper(
+            job.status,
+        )
+
+
+class JobResourceWithStreamingResponse:
+    def __init__(self, job: JobResource) -> None:
+        self._job = job
+
+        self.list = to_streamed_response_wrapper(
+            job.list,
+        )
+        self.artifacts = to_streamed_response_wrapper(
+            job.artifacts,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            job.cancel,
+        )
+        self.status = to_streamed_response_wrapper(
+            job.status,
+        )
+
+
+class AsyncJobResourceWithStreamingResponse:
+    def __init__(self, job: AsyncJobResource) -> None:
+        self._job = job
+
+        self.list = async_to_streamed_response_wrapper(
+            job.list,
+        )
+        self.artifacts = async_to_streamed_response_wrapper(
+            job.artifacts,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            job.cancel,
+        )
+        self.status = async_to_streamed_response_wrapper(
+            job.status,
+        )
diff --git a/src/llama_stack_client/resources/alpha/post_training/post_training.py b/src/llama_stack_client/resources/alpha/post_training/post_training.py
new file mode 100644
index 00000000..a26c813a
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/post_training/post_training.py
@@ -0,0 +1,393 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+
+import httpx
+
+from .job import (
+    JobResource,
+    AsyncJobResource,
+    JobResourceWithRawResponse,
+    AsyncJobResourceWithRawResponse,
+    JobResourceWithStreamingResponse,
+    AsyncJobResourceWithStreamingResponse,
+)
+from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ....types.alpha import (
+    post_training_preference_optimize_params,
+    post_training_supervised_fine_tune_params,
+)
+from ...._base_client import make_request_options
+from ....types.alpha.post_training_job import PostTrainingJob
+from ....types.alpha.algorithm_config_param import AlgorithmConfigParam
+
+__all__ = ["PostTrainingResource", "AsyncPostTrainingResource"]
+
+
+class PostTrainingResource(SyncAPIResource):
+    @cached_property
+    def job(self) -> JobResource:
+        return JobResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> PostTrainingResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return PostTrainingResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> PostTrainingResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return PostTrainingResourceWithStreamingResponse(self)
+
+    def preference_optimize(
+        self,
+        *,
+        algorithm_config: post_training_preference_optimize_params.AlgorithmConfig,
+        finetuned_model: str,
+        hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
+        job_uuid: str,
+        logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
+        training_config: post_training_preference_optimize_params.TrainingConfig,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> PostTrainingJob:
+        """
+        Run preference optimization of a model.
+
+        Args:
+          algorithm_config: The algorithm configuration.
+
+          finetuned_model: The model to fine-tune.
+
+          hyperparam_search_config: The hyperparam search configuration.
+
+          job_uuid: The UUID of the job to create.
+
+          logger_config: The logger configuration.
+
+          training_config: The training configuration.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1alpha/post-training/preference-optimize",
+            body=maybe_transform(
+                {
+                    "algorithm_config": algorithm_config,
+                    "finetuned_model": finetuned_model,
+                    "hyperparam_search_config": hyperparam_search_config,
+                    "job_uuid": job_uuid,
+                    "logger_config": logger_config,
+                    "training_config": training_config,
+                },
+                post_training_preference_optimize_params.PostTrainingPreferenceOptimizeParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PostTrainingJob,
+        )
+
+    def supervised_fine_tune(
+        self,
+        *,
+        hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
+        job_uuid: str,
+        logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
+        training_config: post_training_supervised_fine_tune_params.TrainingConfig,
+        algorithm_config: AlgorithmConfigParam | Omit = omit,
+        checkpoint_dir: str | Omit = omit,
+        model: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> PostTrainingJob:
+        """
+        Run supervised fine-tuning of a model.
+
+        Args:
+          hyperparam_search_config: The hyperparam search configuration.
+
+          job_uuid: The UUID of the job to create.
+
+          logger_config: The logger configuration.
+
+          training_config: The training configuration.
+
+          algorithm_config: The algorithm configuration.
+
+          checkpoint_dir: The directory to save checkpoint(s) to.
+
+          model: The model to fine-tune.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1alpha/post-training/supervised-fine-tune",
+            body=maybe_transform(
+                {
+                    "hyperparam_search_config": hyperparam_search_config,
+                    "job_uuid": job_uuid,
+                    "logger_config": logger_config,
+                    "training_config": training_config,
+                    "algorithm_config": algorithm_config,
+                    "checkpoint_dir": checkpoint_dir,
+                    "model": model,
+                },
+                post_training_supervised_fine_tune_params.PostTrainingSupervisedFineTuneParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PostTrainingJob,
+        )
+
+
+class AsyncPostTrainingResource(AsyncAPIResource):
+    @cached_property
+    def job(self) -> AsyncJobResource:
+        return AsyncJobResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncPostTrainingResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncPostTrainingResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncPostTrainingResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncPostTrainingResourceWithStreamingResponse(self)
+
+    async def preference_optimize(
+        self,
+        *,
+        algorithm_config: post_training_preference_optimize_params.AlgorithmConfig,
+        finetuned_model: str,
+        hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
+        job_uuid: str,
+        logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
+        training_config: post_training_preference_optimize_params.TrainingConfig,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> PostTrainingJob:
+        """
+        Run preference optimization of a model.
+
+        Args:
+          algorithm_config: The algorithm configuration.
+
+          finetuned_model: The model to fine-tune.
+
+          hyperparam_search_config: The hyperparam search configuration.
+
+          job_uuid: The UUID of the job to create.
+
+          logger_config: The logger configuration.
+
+          training_config: The training configuration.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1alpha/post-training/preference-optimize",
+            body=await async_maybe_transform(
+                {
+                    "algorithm_config": algorithm_config,
+                    "finetuned_model": finetuned_model,
+                    "hyperparam_search_config": hyperparam_search_config,
+                    "job_uuid": job_uuid,
+                    "logger_config": logger_config,
+                    "training_config": training_config,
+                },
+                post_training_preference_optimize_params.PostTrainingPreferenceOptimizeParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PostTrainingJob,
+        )
+
+    async def supervised_fine_tune(
+        self,
+        *,
+        hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
+        job_uuid: str,
+        logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
+        training_config: post_training_supervised_fine_tune_params.TrainingConfig,
+        algorithm_config: AlgorithmConfigParam | Omit = omit,
+        checkpoint_dir: str | Omit = omit,
+        model: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> PostTrainingJob:
+        """
+        Run supervised fine-tuning of a model.
+
+        Args:
+          hyperparam_search_config: The hyperparam search configuration.
+
+          job_uuid: The UUID of the job to create.
+
+          logger_config: The logger configuration.
+
+          training_config: The training configuration.
+
+          algorithm_config: The algorithm configuration.
+
+          checkpoint_dir: The directory to save checkpoint(s) to.
+
+          model: The model to fine-tune.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1alpha/post-training/supervised-fine-tune",
+            body=await async_maybe_transform(
+                {
+                    "hyperparam_search_config": hyperparam_search_config,
+                    "job_uuid": job_uuid,
+                    "logger_config": logger_config,
+                    "training_config": training_config,
+                    "algorithm_config": algorithm_config,
+                    "checkpoint_dir": checkpoint_dir,
+                    "model": model,
+                },
+                post_training_supervised_fine_tune_params.PostTrainingSupervisedFineTuneParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PostTrainingJob,
+        )
+
+
+class PostTrainingResourceWithRawResponse:
+    def __init__(self, post_training: PostTrainingResource) -> None:
+        self._post_training = post_training
+
+        self.preference_optimize = to_raw_response_wrapper(
+            post_training.preference_optimize,
+        )
+        self.supervised_fine_tune = to_raw_response_wrapper(
+            post_training.supervised_fine_tune,
+        )
+
+    @cached_property
+    def job(self) -> JobResourceWithRawResponse:
+        return JobResourceWithRawResponse(self._post_training.job)
+
+
+class AsyncPostTrainingResourceWithRawResponse:
+    def __init__(self, post_training: AsyncPostTrainingResource) -> None:
+        self._post_training = post_training
+
+        self.preference_optimize = async_to_raw_response_wrapper(
+            post_training.preference_optimize,
+        )
+        self.supervised_fine_tune = async_to_raw_response_wrapper(
+            post_training.supervised_fine_tune,
+        )
+
+    @cached_property
+    def job(self) -> AsyncJobResourceWithRawResponse:
+        return AsyncJobResourceWithRawResponse(self._post_training.job)
+
+
+class PostTrainingResourceWithStreamingResponse:
+    def __init__(self, post_training: PostTrainingResource) -> None:
+        self._post_training = post_training
+
+        self.preference_optimize = to_streamed_response_wrapper(
+            post_training.preference_optimize,
+        )
+        self.supervised_fine_tune = to_streamed_response_wrapper(
+            post_training.supervised_fine_tune,
+        )
+
+    @cached_property
+    def job(self) -> JobResourceWithStreamingResponse:
+        return JobResourceWithStreamingResponse(self._post_training.job)
+
+
+class AsyncPostTrainingResourceWithStreamingResponse:
+    def __init__(self, post_training: AsyncPostTrainingResource) -> None:
+        self._post_training = post_training
+
+        self.preference_optimize = async_to_streamed_response_wrapper(
+            post_training.preference_optimize,
+        )
+        self.supervised_fine_tune = async_to_streamed_response_wrapper(
+            post_training.supervised_fine_tune,
+        )
+
+    @cached_property
+    def job(self) -> AsyncJobResourceWithStreamingResponse:
+        return AsyncJobResourceWithStreamingResponse(self._post_training.job)
diff --git a/src/llama_stack_client/resources/beta/__init__.py b/src/llama_stack_client/resources/beta/__init__.py
new file mode 100644
index 00000000..6fd69c43
--- /dev/null
+++ b/src/llama_stack_client/resources/beta/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .beta import (
+    BetaResource,
+    AsyncBetaResource,
+    BetaResourceWithRawResponse,
+    AsyncBetaResourceWithRawResponse,
+    BetaResourceWithStreamingResponse,
+    AsyncBetaResourceWithStreamingResponse,
+)
+from .datasets import (
+    DatasetsResource,
+    AsyncDatasetsResource,
+    DatasetsResourceWithRawResponse,
+    AsyncDatasetsResourceWithRawResponse,
+    DatasetsResourceWithStreamingResponse,
+    AsyncDatasetsResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "DatasetsResource",
+    "AsyncDatasetsResource",
+    "DatasetsResourceWithRawResponse",
+    "AsyncDatasetsResourceWithRawResponse",
+    "DatasetsResourceWithStreamingResponse",
+    "AsyncDatasetsResourceWithStreamingResponse",
+    "BetaResource",
+    "AsyncBetaResource",
+    "BetaResourceWithRawResponse",
+    "AsyncBetaResourceWithRawResponse",
+    "BetaResourceWithStreamingResponse",
+    "AsyncBetaResourceWithStreamingResponse",
+]
diff --git a/src/llama_stack_client/resources/beta/beta.py b/src/llama_stack_client/resources/beta/beta.py
new file mode 100644
index 00000000..7bf1c711
--- /dev/null
+++ b/src/llama_stack_client/resources/beta/beta.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .datasets import (
+    DatasetsResource,
+    AsyncDatasetsResource,
+    DatasetsResourceWithRawResponse,
+    AsyncDatasetsResourceWithRawResponse,
+    DatasetsResourceWithStreamingResponse,
+    AsyncDatasetsResourceWithStreamingResponse,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["BetaResource", "AsyncBetaResource"]
+
+
+class BetaResource(SyncAPIResource):
+    @cached_property
+    def datasets(self) -> DatasetsResource:
+        return DatasetsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> BetaResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return BetaResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BetaResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return BetaResourceWithStreamingResponse(self)
+
+
+class AsyncBetaResource(AsyncAPIResource):
+    @cached_property
+    def datasets(self) -> AsyncDatasetsResource:
+        return AsyncDatasetsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncBetaResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBetaResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBetaResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncBetaResourceWithStreamingResponse(self)
+
+
+class BetaResourceWithRawResponse:
+    def __init__(self, beta: BetaResource) -> None:
+        self._beta = beta
+
+    @cached_property
+    def datasets(self) -> DatasetsResourceWithRawResponse:
+        return DatasetsResourceWithRawResponse(self._beta.datasets)
+
+
+class AsyncBetaResourceWithRawResponse:
+    def __init__(self, beta: AsyncBetaResource) -> None:
+        self._beta = beta
+
+    @cached_property
+    def datasets(self) -> AsyncDatasetsResourceWithRawResponse:
+        return AsyncDatasetsResourceWithRawResponse(self._beta.datasets)
+
+
+class BetaResourceWithStreamingResponse:
+    def __init__(self, beta: BetaResource) -> None:
+        self._beta = beta
+
+    @cached_property
+    def datasets(self) -> DatasetsResourceWithStreamingResponse:
+        return DatasetsResourceWithStreamingResponse(self._beta.datasets)
+
+
+class AsyncBetaResourceWithStreamingResponse:
+    def __init__(self, beta: AsyncBetaResource) -> None:
+        self._beta = beta
+
+    @cached_property
+    def datasets(self) -> AsyncDatasetsResourceWithStreamingResponse:
+        return AsyncDatasetsResourceWithStreamingResponse(self._beta.datasets)
diff --git a/src/llama_stack_client/resources/beta/datasets.py b/src/llama_stack_client/resources/beta/datasets.py
new file mode 100644
index 00000000..1b924b28
--- /dev/null
+++ b/src/llama_stack_client/resources/beta/datasets.py
@@ -0,0 +1,676 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Type, Union, Iterable, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..._wrappers import DataWrapper
+from ...types.beta import dataset_iterrows_params, dataset_register_params, dataset_appendrows_params
+from ..._base_client import make_request_options
+from ...types.beta.dataset_list_response import DatasetListResponse
+from ...types.beta.dataset_iterrows_response import DatasetIterrowsResponse
+from ...types.beta.dataset_register_response import DatasetRegisterResponse
+from ...types.beta.dataset_retrieve_response import DatasetRetrieveResponse
+
+__all__ = ["DatasetsResource", "AsyncDatasetsResource"]
+
+
+class DatasetsResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> DatasetsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return DatasetsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> DatasetsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return DatasetsResourceWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        dataset_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DatasetRetrieveResponse:
+        """
+        Get a dataset by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not dataset_id:
+            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
+        return self._get(
+            f"/v1beta/datasets/{dataset_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DatasetRetrieveResponse,
+        )
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DatasetListResponse:
+        """List all datasets."""
+        return self._get(
+            "/v1beta/datasets",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=DataWrapper[DatasetListResponse]._unwrapper,
+            ),
+            cast_to=cast(Type[DatasetListResponse], DataWrapper[DatasetListResponse]),
+        )
+
+    def appendrows(
+        self,
+        dataset_id: str,
+        *,
+        rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Append rows to a dataset.
+
+        Args:
+          rows: The rows to append to the dataset.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not dataset_id:
+            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._post(
+            f"/v1beta/datasetio/append-rows/{dataset_id}",
+            body=maybe_transform({"rows": rows}, dataset_appendrows_params.DatasetAppendrowsParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    def iterrows(
+        self,
+        dataset_id: str,
+        *,
+        limit: int | Omit = omit,
+        start_index: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DatasetIterrowsResponse:
+        """Get a paginated list of rows from a dataset.
+
+        Uses offset-based pagination where:
+
+        - start_index: The starting index (0-based). If None, starts from beginning.
+        - limit: Number of items to return. If None or -1, returns all items.
+
+        The response includes:
+
+        - data: List of items for the current page.
+        - has_more: Whether there are more items available after this set.
+
+        Args:
+          limit: The number of rows to get.
+
+          start_index: Index into dataset for the first row to get. Get all rows if None.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not dataset_id:
+            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
+        return self._get(
+            f"/v1beta/datasetio/iterrows/{dataset_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "limit": limit,
+                        "start_index": start_index,
+                    },
+                    dataset_iterrows_params.DatasetIterrowsParams,
+                ),
+            ),
+            cast_to=DatasetIterrowsResponse,
+        )
+
+    def register(
+        self,
+        *,
+        purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"],
+        source: dataset_register_params.Source,
+        dataset_id: str | Omit = omit,
+        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DatasetRegisterResponse:
+        """Register a new dataset.
+
+        Args:
+          purpose: The purpose of the dataset.
+
+        One of: - "post-training/messages": The dataset
+              contains a messages column with list of messages for post-training. {
+              "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
+              "assistant", "content": "Hello, world!"}, ] } - "eval/question-answer": The
+              dataset contains a question column and an answer column for evaluation. {
+              "question": "What is the capital of France?", "answer": "Paris" } -
+              "eval/messages-answer": The dataset contains a messages column with list of
+              messages and an answer column for evaluation. { "messages": [ {"role": "user",
+              "content": "Hello, my name is John Doe."}, {"role": "assistant", "content":
+              "Hello, John Doe. How can I help you today?"}, {"role": "user", "content":
+              "What's my name?"}, ], "answer": "John Doe" }
+
+          source: The data source of the dataset. Ensure that the data source schema is compatible
+              with the purpose of the dataset. Examples: - { "type": "uri", "uri":
+              "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
+              "lsfs://mydata.jsonl" } - { "type": "uri", "uri":
+              "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
+              "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows", "rows": [
+              { "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
+              "assistant", "content": "Hello, world!"}, ] } ] }
+
+          dataset_id: The ID of the dataset. If not provided, an ID will be generated.
+
+          metadata: The metadata for the dataset. - E.g. {"description": "My dataset"}.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1beta/datasets",
+            body=maybe_transform(
+                {
+                    "purpose": purpose,
+                    "source": source,
+                    "dataset_id": dataset_id,
+                    "metadata": metadata,
+                },
+                dataset_register_params.DatasetRegisterParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DatasetRegisterResponse,
+        )
+
+    def unregister(
+        self,
+        dataset_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Unregister a dataset by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not dataset_id:
+            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/v1beta/datasets/{dataset_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncDatasetsResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncDatasetsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncDatasetsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncDatasetsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncDatasetsResourceWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        dataset_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DatasetRetrieveResponse:
+        """
+        Get a dataset by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not dataset_id:
+            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
+        return await self._get(
+            f"/v1beta/datasets/{dataset_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DatasetRetrieveResponse,
+        )
+
+    async def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DatasetListResponse:
+        """List all datasets."""
+        return await self._get(
+            "/v1beta/datasets",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=DataWrapper[DatasetListResponse]._unwrapper,
+            ),
+            cast_to=cast(Type[DatasetListResponse], DataWrapper[DatasetListResponse]),
+        )
+
+    async def appendrows(
+        self,
+        dataset_id: str,
+        *,
+        rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Append rows to a dataset.
+
+        Args:
+          rows: The rows to append to the dataset.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not dataset_id:
+            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._post(
+            f"/v1beta/datasetio/append-rows/{dataset_id}",
+            body=await async_maybe_transform({"rows": rows}, dataset_appendrows_params.DatasetAppendrowsParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    async def iterrows(
+        self,
+        dataset_id: str,
+        *,
+        limit: int | Omit = omit,
+        start_index: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DatasetIterrowsResponse:
+        """Get a paginated list of rows from a dataset.
+
+        Uses offset-based pagination where:
+
+        - start_index: The starting index (0-based). If None, starts from beginning.
+        - limit: Number of items to return. If None or -1, returns all items.
+
+        The response includes:
+
+        - data: List of items for the current page.
+        - has_more: Whether there are more items available after this set.
+
+        Args:
+          limit: The number of rows to get.
+
+          start_index: Index into dataset for the first row to get. Get all rows if None.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not dataset_id:
+            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
+        return await self._get(
+            f"/v1beta/datasetio/iterrows/{dataset_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "limit": limit,
+                        "start_index": start_index,
+                    },
+                    dataset_iterrows_params.DatasetIterrowsParams,
+                ),
+            ),
+            cast_to=DatasetIterrowsResponse,
+        )
+
+    async def register(
+        self,
+        *,
+        purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"],
+        source: dataset_register_params.Source,
+        dataset_id: str | Omit = omit,
+        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DatasetRegisterResponse:
+        """Register a new dataset.
+
+        Args:
+          purpose: The purpose of the dataset.
+
+        One of: - "post-training/messages": The dataset
+              contains a messages column with list of messages for post-training. {
+              "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
+              "assistant", "content": "Hello, world!"}, ] } - "eval/question-answer": The
+              dataset contains a question column and an answer column for evaluation. {
+              "question": "What is the capital of France?", "answer": "Paris" } -
+              "eval/messages-answer": The dataset contains a messages column with list of
+              messages and an answer column for evaluation. { "messages": [ {"role": "user",
+              "content": "Hello, my name is John Doe."}, {"role": "assistant", "content":
+              "Hello, John Doe. How can I help you today?"}, {"role": "user", "content":
+              "What's my name?"}, ], "answer": "John Doe" }
+
+          source: The data source of the dataset. Ensure that the data source schema is compatible
+              with the purpose of the dataset. Examples: - { "type": "uri", "uri":
+              "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
+              "lsfs://mydata.jsonl" } - { "type": "uri", "uri":
+              "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
+              "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows", "rows": [
+              { "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
+              "assistant", "content": "Hello, world!"}, ] } ] }
+
+          dataset_id: The ID of the dataset. If not provided, an ID will be generated.
+
+          metadata: The metadata for the dataset. - E.g. {"description": "My dataset"}.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1beta/datasets",
+            body=await async_maybe_transform(
+                {
+                    "purpose": purpose,
+                    "source": source,
+                    "dataset_id": dataset_id,
+                    "metadata": metadata,
+                },
+                dataset_register_params.DatasetRegisterParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DatasetRegisterResponse,
+        )
+
+    async def unregister(
+        self,
+        dataset_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """
+        Unregister a dataset by its ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not dataset_id:
+            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/v1beta/datasets/{dataset_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class DatasetsResourceWithRawResponse:
+    def __init__(self, datasets: DatasetsResource) -> None:
+        self._datasets = datasets
+
+        self.retrieve = to_raw_response_wrapper(
+            datasets.retrieve,
+        )
+        self.list = to_raw_response_wrapper(
+            datasets.list,
+        )
+        self.appendrows = to_raw_response_wrapper(
+            datasets.appendrows,
+        )
+        self.iterrows = to_raw_response_wrapper(
+            datasets.iterrows,
+        )
+        self.register = to_raw_response_wrapper(
+            datasets.register,
+        )
+        self.unregister = to_raw_response_wrapper(
+            datasets.unregister,
+        )
+
+
+class AsyncDatasetsResourceWithRawResponse:
+    def __init__(self, datasets: AsyncDatasetsResource) -> None:
+        self._datasets = datasets
+
+        self.retrieve = async_to_raw_response_wrapper(
+            datasets.retrieve,
+        )
+        self.list = async_to_raw_response_wrapper(
+            datasets.list,
+        )
+        self.appendrows = async_to_raw_response_wrapper(
+            datasets.appendrows,
+        )
+        self.iterrows = async_to_raw_response_wrapper(
+            datasets.iterrows,
+        )
+        self.register = async_to_raw_response_wrapper(
+            datasets.register,
+        )
+        self.unregister = async_to_raw_response_wrapper(
+            datasets.unregister,
+        )
+
+
+class DatasetsResourceWithStreamingResponse:
+    def __init__(self, datasets: DatasetsResource) -> None:
+        self._datasets = datasets
+
+        self.retrieve = to_streamed_response_wrapper(
+            datasets.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            datasets.list,
+        )
+        self.appendrows = to_streamed_response_wrapper(
+            datasets.appendrows,
+        )
+        self.iterrows = to_streamed_response_wrapper(
+            datasets.iterrows,
+        )
+        self.register = to_streamed_response_wrapper(
+            datasets.register,
+        )
+        self.unregister = to_streamed_response_wrapper(
+            datasets.unregister,
+        )
+
+
+class AsyncDatasetsResourceWithStreamingResponse:
+    def __init__(self, datasets: AsyncDatasetsResource) -> None:
+        self._datasets = datasets
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            datasets.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            datasets.list,
+        )
+        self.appendrows = async_to_streamed_response_wrapper(
+            datasets.appendrows,
+        )
+        self.iterrows = async_to_streamed_response_wrapper(
+            datasets.iterrows,
+        )
+        self.register = async_to_streamed_response_wrapper(
+            datasets.register,
+        )
+        self.unregister = async_to_streamed_response_wrapper(
+            datasets.unregister,
+        )
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index 43df9408..cd62db7f 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -15,11 +15,14 @@
     Document as Document,
     ToolCall as ToolCall,
     ParamType as ParamType,
+    AgentConfig as AgentConfig,
     QueryConfig as QueryConfig,
     QueryResult as QueryResult,
     UserMessage as UserMessage,
     ScoringResult as ScoringResult,
     SystemMessage as SystemMessage,
+    ResponseFormat as ResponseFormat,
+    SamplingParams as SamplingParams,
     SafetyViolation as SafetyViolation,
     CompletionMessage as CompletionMessage,
     InterleavedContent as InterleavedContent,
@@ -35,6 +38,7 @@
 from .vector_store import VectorStore as VectorStore
 from .version_info import VersionInfo as VersionInfo
 from .provider_info import ProviderInfo as ProviderInfo
+from .tool_def_param import ToolDefParam as ToolDefParam
 from .create_response import CreateResponse as CreateResponse
 from .response_object import ResponseObject as ResponseObject
 from .file_list_params import FileListParams as FileListParams
diff --git a/src/llama_stack_client/types/alpha/__init__.py b/src/llama_stack_client/types/alpha/__init__.py
index d14ed874..61e02a4e 100644
--- a/src/llama_stack_client/types/alpha/__init__.py
+++ b/src/llama_stack_client/types/alpha/__init__.py
@@ -7,3 +7,37 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
+
+from .job import Job as Job
+from .benchmark import Benchmark as Benchmark
+from .tool_response import ToolResponse as ToolResponse
+from .inference_step import InferenceStep as InferenceStep
+from .shield_call_step import ShieldCallStep as ShieldCallStep
+from .agent_list_params import AgentListParams as AgentListParams
+from .evaluate_response import EvaluateResponse as EvaluateResponse
+from .post_training_job import PostTrainingJob as PostTrainingJob
+from .agent_create_params import AgentCreateParams as AgentCreateParams
+from .agent_list_response import AgentListResponse as AgentListResponse
+from .tool_execution_step import ToolExecutionStep as ToolExecutionStep
+from .tool_response_param import ToolResponseParam as ToolResponseParam
+from .eval_run_eval_params import EvalRunEvalParams as EvalRunEvalParams
+from .agent_create_response import AgentCreateResponse as AgentCreateResponse
+from .memory_retrieval_step import MemoryRetrievalStep as MemoryRetrievalStep
+from .algorithm_config_param import AlgorithmConfigParam as AlgorithmConfigParam
+from .benchmark_config_param import BenchmarkConfigParam as BenchmarkConfigParam
+from .agent_retrieve_response import AgentRetrieveResponse as AgentRetrieveResponse
+from .benchmark_list_response import BenchmarkListResponse as BenchmarkListResponse
+from .inference_rerank_params import InferenceRerankParams as InferenceRerankParams
+from .list_benchmarks_response import ListBenchmarksResponse as ListBenchmarksResponse
+from .benchmark_register_params import BenchmarkRegisterParams as BenchmarkRegisterParams
+from .eval_evaluate_rows_params import EvalEvaluateRowsParams as EvalEvaluateRowsParams
+from .inference_rerank_response import InferenceRerankResponse as InferenceRerankResponse
+from .eval_run_eval_alpha_params import EvalRunEvalAlphaParams as EvalRunEvalAlphaParams
+from .eval_evaluate_rows_alpha_params import EvalEvaluateRowsAlphaParams as EvalEvaluateRowsAlphaParams
+from .list_post_training_jobs_response import ListPostTrainingJobsResponse as ListPostTrainingJobsResponse
+from .post_training_preference_optimize_params import (
+    PostTrainingPreferenceOptimizeParams as PostTrainingPreferenceOptimizeParams,
+)
+from .post_training_supervised_fine_tune_params import (
+    PostTrainingSupervisedFineTuneParams as PostTrainingSupervisedFineTuneParams,
+)
diff --git a/src/llama_stack_client/types/alpha/agent_create_params.py b/src/llama_stack_client/types/alpha/agent_create_params.py
new file mode 100644
index 00000000..368704b2
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agent_create_params.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from ..shared_params.agent_config import AgentConfig
+
+__all__ = ["AgentCreateParams"]
+
+
+class AgentCreateParams(TypedDict, total=False):
+    agent_config: Required[AgentConfig]
+    """The configuration for the agent."""
diff --git a/src/llama_stack_client/types/alpha/agent_create_response.py b/src/llama_stack_client/types/alpha/agent_create_response.py
new file mode 100644
index 00000000..9b155198
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agent_create_response.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["AgentCreateResponse"]
+
+
+class AgentCreateResponse(BaseModel):
+    agent_id: str
+    """Unique identifier for the created agent"""
diff --git a/src/llama_stack_client/types/alpha/agent_list_params.py b/src/llama_stack_client/types/alpha/agent_list_params.py
new file mode 100644
index 00000000..15da545b
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agent_list_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["AgentListParams"]
+
+
+class AgentListParams(TypedDict, total=False):
+    limit: int
+    """The number of agents to return."""
+
+    start_index: int
+    """The index to start the pagination from."""
diff --git a/src/llama_stack_client/types/alpha/agent_list_response.py b/src/llama_stack_client/types/alpha/agent_list_response.py
new file mode 100644
index 00000000..69de5001
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agent_list_response.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["AgentListResponse"]
+
+
+class AgentListResponse(BaseModel):
+    data: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
+    """The list of items for the current page"""
+
+    has_more: bool
+    """Whether there are more items available after this set"""
+
+    url: Optional[str] = None
+    """The URL for accessing this list"""
diff --git a/src/llama_stack_client/types/alpha/agent_retrieve_response.py b/src/llama_stack_client/types/alpha/agent_retrieve_response.py
new file mode 100644
index 00000000..87d79b7b
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agent_retrieve_response.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from datetime import datetime
+
+from ..._models import BaseModel
+from ..shared.agent_config import AgentConfig
+
+__all__ = ["AgentRetrieveResponse"]
+
+
+class AgentRetrieveResponse(BaseModel):
+    agent_config: AgentConfig
+    """Configuration settings for the agent"""
+
+    agent_id: str
+    """Unique identifier for the agent"""
+
+    created_at: datetime
+    """Timestamp when the agent was created"""
diff --git a/src/llama_stack_client/types/alpha/agents/__init__.py b/src/llama_stack_client/types/alpha/agents/__init__.py
index d14ed874..f28e38e4 100644
--- a/src/llama_stack_client/types/alpha/agents/__init__.py
+++ b/src/llama_stack_client/types/alpha/agents/__init__.py
@@ -7,3 +7,16 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
+
+from .turn import Turn as Turn
+from .session import Session as Session
+from .turn_create_params import TurnCreateParams as TurnCreateParams
+from .turn_resume_params import TurnResumeParams as TurnResumeParams
+from .session_list_params import SessionListParams as SessionListParams
+from .turn_response_event import TurnResponseEvent as TurnResponseEvent
+from .session_create_params import SessionCreateParams as SessionCreateParams
+from .session_list_response import SessionListResponse as SessionListResponse
+from .step_retrieve_response import StepRetrieveResponse as StepRetrieveResponse
+from .session_create_response import SessionCreateResponse as SessionCreateResponse
+from .session_retrieve_params import SessionRetrieveParams as SessionRetrieveParams
+from .agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk as AgentTurnResponseStreamChunk
diff --git a/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py b/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
new file mode 100644
index 00000000..c45bf756
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ...._models import BaseModel
+from .turn_response_event import TurnResponseEvent
+
+__all__ = ["AgentTurnResponseStreamChunk"]
+
+
+class AgentTurnResponseStreamChunk(BaseModel):
+    event: TurnResponseEvent
+    """Individual event in the agent turn response stream"""
diff --git a/src/llama_stack_client/types/alpha/agents/session.py b/src/llama_stack_client/types/alpha/agents/session.py
new file mode 100644
index 00000000..9b60853a
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/session.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from datetime import datetime
+
+from .turn import Turn
+from ...._models import BaseModel
+
+__all__ = ["Session"]
+
+
+class Session(BaseModel):
+    session_id: str
+    """Unique identifier for the conversation session"""
+
+    session_name: str
+    """Human-readable name for the session"""
+
+    started_at: datetime
+    """Timestamp when the session was created"""
+
+    turns: List[Turn]
+    """List of all turns that have occurred in this session"""
diff --git a/src/llama_stack_client/types/alpha/agents/session_create_params.py b/src/llama_stack_client/types/alpha/agents/session_create_params.py
new file mode 100644
index 00000000..5f421ae9
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/session_create_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["SessionCreateParams"]
+
+
+class SessionCreateParams(TypedDict, total=False):
+    session_name: Required[str]
+    """The name of the session to create."""
diff --git a/src/llama_stack_client/types/alpha/agents/session_create_response.py b/src/llama_stack_client/types/alpha/agents/session_create_response.py
new file mode 100644
index 00000000..7d30c61a
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/session_create_response.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ...._models import BaseModel
+
+__all__ = ["SessionCreateResponse"]
+
+
+class SessionCreateResponse(BaseModel):
+    session_id: str
+    """Unique identifier for the created session"""
diff --git a/src/llama_stack_client/types/alpha/agents/session_list_params.py b/src/llama_stack_client/types/alpha/agents/session_list_params.py
new file mode 100644
index 00000000..0644d1ae
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/session_list_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["SessionListParams"]
+
+
+class SessionListParams(TypedDict, total=False):
+    limit: int
+    """The number of sessions to return."""
+
+    start_index: int
+    """The index to start the pagination from."""
diff --git a/src/llama_stack_client/types/alpha/agents/session_list_response.py b/src/llama_stack_client/types/alpha/agents/session_list_response.py
new file mode 100644
index 00000000..23a51baf
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/session_list_response.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+
+from ...._models import BaseModel
+
+__all__ = ["SessionListResponse"]
+
+
+class SessionListResponse(BaseModel):
+    data: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
+    """The list of items for the current page"""
+
+    has_more: bool
+    """Whether there are more items available after this set"""
+
+    url: Optional[str] = None
+    """The URL for accessing this list"""
diff --git a/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py b/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
new file mode 100644
index 00000000..116190cc
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from ...._types import SequenceNotStr
+
+__all__ = ["SessionRetrieveParams"]
+
+
+class SessionRetrieveParams(TypedDict, total=False):
+    agent_id: Required[str]
+
+    turn_ids: SequenceNotStr[str]
+    """(Optional) List of turn IDs to filter the session by."""
diff --git a/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py b/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
new file mode 100644
index 00000000..55b64355
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
+from ..inference_step import InferenceStep
+from ..shield_call_step import ShieldCallStep
+from ..tool_execution_step import ToolExecutionStep
+from ..memory_retrieval_step import MemoryRetrievalStep
+
+__all__ = ["StepRetrieveResponse", "Step"]
+
+Step: TypeAlias = Annotated[
+    Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep],
+    PropertyInfo(discriminator="step_type"),
+]
+
+
+class StepRetrieveResponse(BaseModel):
+    step: Step
+    """The complete step data and execution details"""
diff --git a/src/llama_stack_client/types/alpha/agents/turn.py b/src/llama_stack_client/types/alpha/agents/turn.py
new file mode 100644
index 00000000..74ef22aa
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/turn.py
@@ -0,0 +1,116 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from datetime import datetime
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
+from ..inference_step import InferenceStep
+from ..shield_call_step import ShieldCallStep
+from ..tool_execution_step import ToolExecutionStep
+from ...shared.user_message import UserMessage
+from ..memory_retrieval_step import MemoryRetrievalStep
+from ...shared.completion_message import CompletionMessage
+from ...shared.tool_response_message import ToolResponseMessage
+from ...shared.interleaved_content_item import InterleavedContentItem
+
+__all__ = [
+    "Turn",
+    "InputMessage",
+    "Step",
+    "OutputAttachment",
+    "OutputAttachmentContent",
+    "OutputAttachmentContentImageContentItem",
+    "OutputAttachmentContentImageContentItemImage",
+    "OutputAttachmentContentImageContentItemImageURL",
+    "OutputAttachmentContentTextContentItem",
+    "OutputAttachmentContentURL",
+]
+
+InputMessage: TypeAlias = Union[UserMessage, ToolResponseMessage]
+
+Step: TypeAlias = Annotated[
+    Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep],
+    PropertyInfo(discriminator="step_type"),
+]
+
+
+class OutputAttachmentContentImageContentItemImageURL(BaseModel):
+    uri: str
+    """The URL string pointing to the resource"""
+
+
+class OutputAttachmentContentImageContentItemImage(BaseModel):
+    data: Optional[str] = None
+    """base64 encoded image data as string"""
+
+    url: Optional[OutputAttachmentContentImageContentItemImageURL] = None
+    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
+
+    Note that URL could have length limits.
+    """
+
+
+class OutputAttachmentContentImageContentItem(BaseModel):
+    image: OutputAttachmentContentImageContentItemImage
+    """Image as a base64 encoded string or an URL"""
+
+    type: Literal["image"]
+    """Discriminator type of the content item. Always "image" """
+
+
+class OutputAttachmentContentTextContentItem(BaseModel):
+    text: str
+    """Text content"""
+
+    type: Literal["text"]
+    """Discriminator type of the content item. Always "text" """
+
+
+class OutputAttachmentContentURL(BaseModel):
+    uri: str
+    """The URL string pointing to the resource"""
+
+
+OutputAttachmentContent: TypeAlias = Union[
+    str,
+    OutputAttachmentContentImageContentItem,
+    OutputAttachmentContentTextContentItem,
+    List[InterleavedContentItem],
+    OutputAttachmentContentURL,
+]
+
+
+class OutputAttachment(BaseModel):
+    content: OutputAttachmentContent
+    """The content of the attachment."""
+
+    mime_type: str
+    """The MIME type of the attachment."""
+
+
+class Turn(BaseModel):
+    input_messages: List[InputMessage]
+    """List of messages that initiated this turn"""
+
+    output_message: CompletionMessage
+    """The model's generated response containing content and metadata"""
+
+    session_id: str
+    """Unique identifier for the conversation session"""
+
+    started_at: datetime
+    """Timestamp when the turn began"""
+
+    steps: List[Step]
+    """Ordered list of processing steps executed during this turn"""
+
+    turn_id: str
+    """Unique identifier for the turn within a session"""
+
+    completed_at: Optional[datetime] = None
+    """(Optional) Timestamp when the turn finished, if completed"""
+
+    output_attachments: Optional[List[OutputAttachment]] = None
+    """(Optional) Files or media attached to the agent's response"""
diff --git a/src/llama_stack_client/types/alpha/agents/turn_create_params.py b/src/llama_stack_client/types/alpha/agents/turn_create_params.py
new file mode 100644
index 00000000..7225959a
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/turn_create_params.py
@@ -0,0 +1,164 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ...._types import SequenceNotStr
+from ...shared_params.user_message import UserMessage
+from ...shared_params.tool_response_message import ToolResponseMessage
+from ...shared_params.interleaved_content_item import InterleavedContentItem
+
+__all__ = [
+    "TurnCreateParamsBase",
+    "Message",
+    "Document",
+    "DocumentContent",
+    "DocumentContentImageContentItem",
+    "DocumentContentImageContentItemImage",
+    "DocumentContentImageContentItemImageURL",
+    "DocumentContentTextContentItem",
+    "DocumentContentURL",
+    "ToolConfig",
+    "Toolgroup",
+    "ToolgroupAgentToolGroupWithArgs",
+    "TurnCreateParamsNonStreaming",
+    "TurnCreateParamsStreaming",
+]
+
+
+class TurnCreateParamsBase(TypedDict, total=False):
+    agent_id: Required[str]
+
+    messages: Required[Iterable[Message]]
+    """List of messages to start the turn with."""
+
+    documents: Iterable[Document]
+    """(Optional) List of documents to create the turn with."""
+
+    tool_config: ToolConfig
+    """
+    (Optional) The tool configuration to create the turn with, will be used to
+    override the agent's tool_config.
+    """
+
+    toolgroups: SequenceNotStr[Toolgroup]
+    """
+    (Optional) List of toolgroups to create the turn with, will be used in addition
+    to the agent's config toolgroups for the request.
+    """
+
+
+Message: TypeAlias = Union[UserMessage, ToolResponseMessage]
+
+
+class DocumentContentImageContentItemImageURL(TypedDict, total=False):
+    uri: Required[str]
+    """The URL string pointing to the resource"""
+
+
+class DocumentContentImageContentItemImage(TypedDict, total=False):
+    data: str
+    """base64 encoded image data as string"""
+
+    url: DocumentContentImageContentItemImageURL
+    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
+
+    Note that URL could have length limits.
+    """
+
+
+class DocumentContentImageContentItem(TypedDict, total=False):
+    image: Required[DocumentContentImageContentItemImage]
+    """Image as a base64 encoded string or an URL"""
+
+    type: Required[Literal["image"]]
+    """Discriminator type of the content item. Always "image" """
+
+
+class DocumentContentTextContentItem(TypedDict, total=False):
+    text: Required[str]
+    """Text content"""
+
+    type: Required[Literal["text"]]
+    """Discriminator type of the content item. Always "text" """
+
+
+class DocumentContentURL(TypedDict, total=False):
+    uri: Required[str]
+    """The URL string pointing to the resource"""
+
+
+DocumentContent: TypeAlias = Union[
+    str,
+    DocumentContentImageContentItem,
+    DocumentContentTextContentItem,
+    Iterable[InterleavedContentItem],
+    DocumentContentURL,
+]
+
+
+class Document(TypedDict, total=False):
+    content: Required[DocumentContent]
+    """The content of the document."""
+
+    mime_type: Required[str]
+    """The MIME type of the document."""
+
+
+class ToolConfig(TypedDict, total=False):
+    system_message_behavior: Literal["append", "replace"]
+    """(Optional) Config for how to override the default system prompt.
+
+    - `SystemMessageBehavior.append`: Appends the provided system message to the
+      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
+      system prompt with the provided system message. The system message can include
+      the string '{{function_definitions}}' to indicate where the function
+      definitions should be inserted.
+    """
+
+    tool_choice: Union[Literal["auto", "required", "none"], str]
+    """(Optional) Whether tool use is automatic, required, or none.
+
+    Can also specify a tool name to use a specific tool. Defaults to
+    ToolChoice.auto.
+    """
+
+    tool_prompt_format: Literal["json", "function_tag", "python_list"]
+    """(Optional) Instructs the model how to format tool calls.
+
+    By default, Llama Stack will attempt to use a format that is best adapted to the
+    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
+    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+    are output as Python syntax -- a list of function calls.
+    """
+
+
+class ToolgroupAgentToolGroupWithArgs(TypedDict, total=False):
+    args: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+
+    name: Required[str]
+
+
+Toolgroup: TypeAlias = Union[str, ToolgroupAgentToolGroupWithArgs]
+
+
+class TurnCreateParamsNonStreaming(TurnCreateParamsBase, total=False):
+    stream: Literal[False]
+    """(Optional) If True, generate an SSE event stream of the response.
+
+    Defaults to False.
+    """
+
+
+class TurnCreateParamsStreaming(TurnCreateParamsBase):
+    stream: Required[Literal[True]]
+    """(Optional) If True, generate an SSE event stream of the response.
+
+    Defaults to False.
+    """
+
+
+TurnCreateParams = Union[TurnCreateParamsNonStreaming, TurnCreateParamsStreaming]
diff --git a/src/llama_stack_client/types/alpha/agents/turn_response_event.py b/src/llama_stack_client/types/alpha/agents/turn_response_event.py
new file mode 100644
index 00000000..c162135d
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/turn_response_event.py
@@ -0,0 +1,160 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .turn import Turn
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
+from ..inference_step import InferenceStep
+from ..shield_call_step import ShieldCallStep
+from ...shared.tool_call import ToolCall
+from ..tool_execution_step import ToolExecutionStep
+from ..memory_retrieval_step import MemoryRetrievalStep
+
+__all__ = [
+    "TurnResponseEvent",
+    "Payload",
+    "PayloadAgentTurnResponseStepStartPayload",
+    "PayloadAgentTurnResponseStepProgressPayload",
+    "PayloadAgentTurnResponseStepProgressPayloadDelta",
+    "PayloadAgentTurnResponseStepProgressPayloadDeltaTextDelta",
+    "PayloadAgentTurnResponseStepProgressPayloadDeltaImageDelta",
+    "PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDelta",
+    "PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDeltaToolCall",
+    "PayloadAgentTurnResponseStepCompletePayload",
+    "PayloadAgentTurnResponseStepCompletePayloadStepDetails",
+    "PayloadAgentTurnResponseTurnStartPayload",
+    "PayloadAgentTurnResponseTurnCompletePayload",
+    "PayloadAgentTurnResponseTurnAwaitingInputPayload",
+]
+
+
+class PayloadAgentTurnResponseStepStartPayload(BaseModel):
+    event_type: Literal["step_start"]
+    """Type of event being reported"""
+
+    step_id: str
+    """Unique identifier for the step within a turn"""
+
+    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
+    """Type of step being executed"""
+
+    metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
+    """(Optional) Additional metadata for the step"""
+
+
+class PayloadAgentTurnResponseStepProgressPayloadDeltaTextDelta(BaseModel):
+    text: str
+    """The incremental text content"""
+
+    type: Literal["text"]
+    """Discriminator type of the delta. Always "text" """
+
+
+class PayloadAgentTurnResponseStepProgressPayloadDeltaImageDelta(BaseModel):
+    image: str
+    """The incremental image data as bytes"""
+
+    type: Literal["image"]
+    """Discriminator type of the delta. Always "image" """
+
+
+PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDeltaToolCall: TypeAlias = Union[str, ToolCall]
+
+
+class PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDelta(BaseModel):
+    parse_status: Literal["started", "in_progress", "failed", "succeeded"]
+    """Current parsing status of the tool call"""
+
+    tool_call: PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDeltaToolCall
+    """Either an in-progress tool call string or the final parsed tool call"""
+
+    type: Literal["tool_call"]
+    """Discriminator type of the delta. Always "tool_call" """
+
+
+PayloadAgentTurnResponseStepProgressPayloadDelta: TypeAlias = Annotated[
+    Union[
+        PayloadAgentTurnResponseStepProgressPayloadDeltaTextDelta,
+        PayloadAgentTurnResponseStepProgressPayloadDeltaImageDelta,
+        PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDelta,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PayloadAgentTurnResponseStepProgressPayload(BaseModel):
+    delta: PayloadAgentTurnResponseStepProgressPayloadDelta
+    """Incremental content changes during step execution"""
+
+    event_type: Literal["step_progress"]
+    """Type of event being reported"""
+
+    step_id: str
+    """Unique identifier for the step within a turn"""
+
+    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
+    """Type of step being executed"""
+
+
+PayloadAgentTurnResponseStepCompletePayloadStepDetails: TypeAlias = Annotated[
+    Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep],
+    PropertyInfo(discriminator="step_type"),
+]
+
+
+class PayloadAgentTurnResponseStepCompletePayload(BaseModel):
+    event_type: Literal["step_complete"]
+    """Type of event being reported"""
+
+    step_details: PayloadAgentTurnResponseStepCompletePayloadStepDetails
+    """Complete details of the executed step"""
+
+    step_id: str
+    """Unique identifier for the step within a turn"""
+
+    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
+    """Type of step being executed"""
+
+
+class PayloadAgentTurnResponseTurnStartPayload(BaseModel):
+    event_type: Literal["turn_start"]
+    """Type of event being reported"""
+
+    turn_id: str
+    """Unique identifier for the turn within a session"""
+
+
+class PayloadAgentTurnResponseTurnCompletePayload(BaseModel):
+    event_type: Literal["turn_complete"]
+    """Type of event being reported"""
+
+    turn: Turn
+    """Complete turn data including all steps and results"""
+
+
+class PayloadAgentTurnResponseTurnAwaitingInputPayload(BaseModel):
+    event_type: Literal["turn_awaiting_input"]
+    """Type of event being reported"""
+
+    turn: Turn
+    """Turn data when waiting for external tool responses"""
+
+
+Payload: TypeAlias = Annotated[
+    Union[
+        PayloadAgentTurnResponseStepStartPayload,
+        PayloadAgentTurnResponseStepProgressPayload,
+        PayloadAgentTurnResponseStepCompletePayload,
+        PayloadAgentTurnResponseTurnStartPayload,
+        PayloadAgentTurnResponseTurnCompletePayload,
+        PayloadAgentTurnResponseTurnAwaitingInputPayload,
+    ],
+    PropertyInfo(discriminator="event_type"),
+]
+
+
+class TurnResponseEvent(BaseModel):
+    payload: Payload
+    """Event-specific payload containing event data"""
diff --git a/src/llama_stack_client/types/alpha/agents/turn_resume_params.py b/src/llama_stack_client/types/alpha/agents/turn_resume_params.py
new file mode 100644
index 00000000..554e3578
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/turn_resume_params.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from ..tool_response_param import ToolResponseParam
+
+__all__ = ["TurnResumeParamsBase", "TurnResumeParamsNonStreaming", "TurnResumeParamsStreaming"]
+
+
+class TurnResumeParamsBase(TypedDict, total=False):
+    agent_id: Required[str]
+
+    session_id: Required[str]
+
+    tool_responses: Required[Iterable[ToolResponseParam]]
+    """The tool call responses to resume the turn with."""
+
+
+class TurnResumeParamsNonStreaming(TurnResumeParamsBase, total=False):
+    stream: Literal[False]
+    """Whether to stream the response."""
+
+
+class TurnResumeParamsStreaming(TurnResumeParamsBase):
+    stream: Required[Literal[True]]
+    """Whether to stream the response."""
+
+
+TurnResumeParams = Union[TurnResumeParamsNonStreaming, TurnResumeParamsStreaming]
diff --git a/src/llama_stack_client/types/alpha/algorithm_config_param.py b/src/llama_stack_client/types/alpha/algorithm_config_param.py
new file mode 100644
index 00000000..d6da8130
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/algorithm_config_param.py
@@ -0,0 +1,50 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+
+__all__ = ["AlgorithmConfigParam", "LoraFinetuningConfig", "QatFinetuningConfig"]
+
+
+class LoraFinetuningConfig(TypedDict, total=False):
+    alpha: Required[int]
+    """LoRA scaling parameter that controls adaptation strength"""
+
+    apply_lora_to_mlp: Required[bool]
+    """Whether to apply LoRA to MLP layers"""
+
+    apply_lora_to_output: Required[bool]
+    """Whether to apply LoRA to output projection layers"""
+
+    lora_attn_modules: Required[SequenceNotStr[str]]
+    """List of attention module names to apply LoRA to"""
+
+    rank: Required[int]
+    """Rank of the LoRA adaptation (lower rank = fewer parameters)"""
+
+    type: Required[Literal["LoRA"]]
+    """Algorithm type identifier, always "LoRA" """
+
+    quantize_base: bool
+    """(Optional) Whether to quantize the base model weights"""
+
+    use_dora: bool
+    """(Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)"""
+
+
+class QatFinetuningConfig(TypedDict, total=False):
+    group_size: Required[int]
+    """Size of groups for grouped quantization"""
+
+    quantizer_name: Required[str]
+    """Name of the quantization algorithm to use"""
+
+    type: Required[Literal["QAT"]]
+    """Algorithm type identifier, always "QAT" """
+
+
+AlgorithmConfigParam: TypeAlias = Union[LoraFinetuningConfig, QatFinetuningConfig]
diff --git a/src/llama_stack_client/types/alpha/benchmark.py b/src/llama_stack_client/types/alpha/benchmark.py
new file mode 100644
index 00000000..4313a7af
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/benchmark.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["Benchmark"]
+
+
+class Benchmark(BaseModel):
+    dataset_id: str
+    """Identifier of the dataset to use for the benchmark evaluation"""
+
+    identifier: str
+
+    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
+    """Metadata for this evaluation task"""
+
+    provider_id: str
+
+    scoring_functions: List[str]
+    """List of scoring function identifiers to apply during evaluation"""
+
+    type: Literal["benchmark"]
+    """The resource type, always benchmark"""
+
+    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/alpha/benchmark_config_param.py b/src/llama_stack_client/types/alpha/benchmark_config_param.py
new file mode 100644
index 00000000..4a3ea512
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/benchmark_config_param.py
@@ -0,0 +1,53 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..scoring_fn_params_param import ScoringFnParamsParam
+from ..shared_params.agent_config import AgentConfig
+from ..shared_params.system_message import SystemMessage
+from ..shared_params.sampling_params import SamplingParams
+
+__all__ = ["BenchmarkConfigParam", "EvalCandidate", "EvalCandidateModelCandidate", "EvalCandidateAgentCandidate"]
+
+
+class EvalCandidateModelCandidate(TypedDict, total=False):
+    model: Required[str]
+    """The model ID to evaluate."""
+
+    sampling_params: Required[SamplingParams]
+    """The sampling parameters for the model."""
+
+    type: Required[Literal["model"]]
+
+    system_message: SystemMessage
+    """(Optional) The system message providing instructions or context to the model."""
+
+
+class EvalCandidateAgentCandidate(TypedDict, total=False):
+    config: Required[AgentConfig]
+    """The configuration for the agent candidate."""
+
+    type: Required[Literal["agent"]]
+
+
+EvalCandidate: TypeAlias = Union[EvalCandidateModelCandidate, EvalCandidateAgentCandidate]
+
+
+class BenchmarkConfigParam(TypedDict, total=False):
+    eval_candidate: Required[EvalCandidate]
+    """The candidate to evaluate."""
+
+    scoring_params: Required[Dict[str, ScoringFnParamsParam]]
+    """
+    Map between scoring function id and parameters for each scoring function you
+    want to run
+    """
+
+    num_examples: int
+    """(Optional) The number of examples to evaluate.
+
+    If not provided, all examples in the dataset will be evaluated
+    """
diff --git a/src/llama_stack_client/types/alpha/benchmark_list_response.py b/src/llama_stack_client/types/alpha/benchmark_list_response.py
new file mode 100644
index 00000000..b2e8ad2b
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/benchmark_list_response.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import TypeAlias
+
+from .benchmark import Benchmark
+
+__all__ = ["BenchmarkListResponse"]
+
+BenchmarkListResponse: TypeAlias = List[Benchmark]
diff --git a/src/llama_stack_client/types/alpha/benchmark_register_params.py b/src/llama_stack_client/types/alpha/benchmark_register_params.py
new file mode 100644
index 00000000..c8cb02ff
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/benchmark_register_params.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Required, TypedDict
+
+from ..._types import SequenceNotStr
+
+__all__ = ["BenchmarkRegisterParams"]
+
+
+class BenchmarkRegisterParams(TypedDict, total=False):
+    benchmark_id: Required[str]
+    """The ID of the benchmark to register."""
+
+    dataset_id: Required[str]
+    """The ID of the dataset to use for the benchmark."""
+
+    scoring_functions: Required[SequenceNotStr[str]]
+    """The scoring functions to use for the benchmark."""
+
+    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """The metadata to use for the benchmark."""
+
+    provider_benchmark_id: str
+    """The ID of the provider benchmark to use for the benchmark."""
+
+    provider_id: str
+    """The ID of the provider to use for the benchmark."""
diff --git a/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py b/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
new file mode 100644
index 00000000..0422e224
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Required, TypedDict
+
+from ..._types import SequenceNotStr
+from .benchmark_config_param import BenchmarkConfigParam
+
+__all__ = ["EvalEvaluateRowsAlphaParams"]
+
+
+class EvalEvaluateRowsAlphaParams(TypedDict, total=False):
+    benchmark_config: Required[BenchmarkConfigParam]
+    """The configuration for the benchmark."""
+
+    input_rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
+    """The rows to evaluate."""
+
+    scoring_functions: Required[SequenceNotStr[str]]
+    """The scoring functions to use for the evaluation."""
diff --git a/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py b/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
new file mode 100644
index 00000000..4ff9bd5b
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Required, TypedDict
+
+from ..._types import SequenceNotStr
+from .benchmark_config_param import BenchmarkConfigParam
+
+__all__ = ["EvalEvaluateRowsParams"]
+
+
+class EvalEvaluateRowsParams(TypedDict, total=False):
+    benchmark_config: Required[BenchmarkConfigParam]
+    """The configuration for the benchmark."""
+
+    input_rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
+    """The rows to evaluate."""
+
+    scoring_functions: Required[SequenceNotStr[str]]
+    """The scoring functions to use for the evaluation."""
diff --git a/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
new file mode 100644
index 00000000..e07393b3
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from .benchmark_config_param import BenchmarkConfigParam
+
+__all__ = ["EvalRunEvalAlphaParams"]
+
+
+class EvalRunEvalAlphaParams(TypedDict, total=False):
+    benchmark_config: Required[BenchmarkConfigParam]
+    """The configuration for the benchmark."""
diff --git a/src/llama_stack_client/types/alpha/eval_run_eval_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_params.py
new file mode 100644
index 00000000..33596fc2
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/eval_run_eval_params.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from .benchmark_config_param import BenchmarkConfigParam
+
+__all__ = ["EvalRunEvalParams"]
+
+
+class EvalRunEvalParams(TypedDict, total=False):
+    benchmark_config: Required[BenchmarkConfigParam]
+    """The configuration for the benchmark."""
diff --git a/src/llama_stack_client/types/alpha/evaluate_response.py b/src/llama_stack_client/types/alpha/evaluate_response.py
new file mode 100644
index 00000000..4cd2e0f7
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/evaluate_response.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union
+
+from ..._models import BaseModel
+from ..shared.scoring_result import ScoringResult
+
+__all__ = ["EvaluateResponse"]
+
+
+class EvaluateResponse(BaseModel):
+    generations: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
+    """The generations from the evaluation."""
+
+    scores: Dict[str, ScoringResult]
+    """The scores from the evaluation."""
diff --git a/src/llama_stack_client/types/alpha/inference_rerank_params.py b/src/llama_stack_client/types/alpha/inference_rerank_params.py
new file mode 100644
index 00000000..4c506240
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/inference_rerank_params.py
@@ -0,0 +1,106 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+
+__all__ = [
+    "InferenceRerankParams",
+    "Item",
+    "ItemOpenAIChatCompletionContentPartTextParam",
+    "ItemOpenAIChatCompletionContentPartImageParam",
+    "ItemOpenAIChatCompletionContentPartImageParamImageURL",
+    "Query",
+    "QueryOpenAIChatCompletionContentPartTextParam",
+    "QueryOpenAIChatCompletionContentPartImageParam",
+    "QueryOpenAIChatCompletionContentPartImageParamImageURL",
+]
+
+
+class InferenceRerankParams(TypedDict, total=False):
+    items: Required[SequenceNotStr[Item]]
+    """List of items to rerank.
+
+    Each item can be a string, text content part, or image content part. Each input
+    must not exceed the model's max input token length.
+    """
+
+    model: Required[str]
+    """The identifier of the reranking model to use."""
+
+    query: Required[Query]
+    """The search query to rank items against.
+
+    Can be a string, text content part, or image content part. The input must not
+    exceed the model's max input token length.
+    """
+
+    max_num_results: int
+    """(Optional) Maximum number of results to return. Default: returns all."""
+
+
+class ItemOpenAIChatCompletionContentPartTextParam(TypedDict, total=False):
+    text: Required[str]
+    """The text content of the message"""
+
+    type: Required[Literal["text"]]
+    """Must be "text" to identify this as text content"""
+
+
+class ItemOpenAIChatCompletionContentPartImageParamImageURL(TypedDict, total=False):
+    url: Required[str]
+    """URL of the image to include in the message"""
+
+    detail: str
+    """(Optional) Level of detail for image processing.
+
+    Can be "low", "high", or "auto"
+    """
+
+
+class ItemOpenAIChatCompletionContentPartImageParam(TypedDict, total=False):
+    image_url: Required[ItemOpenAIChatCompletionContentPartImageParamImageURL]
+    """Image URL specification and processing details"""
+
+    type: Required[Literal["image_url"]]
+    """Must be "image_url" to identify this as image content"""
+
+
+Item: TypeAlias = Union[
+    str, ItemOpenAIChatCompletionContentPartTextParam, ItemOpenAIChatCompletionContentPartImageParam
+]
+
+
+class QueryOpenAIChatCompletionContentPartTextParam(TypedDict, total=False):
+    text: Required[str]
+    """The text content of the message"""
+
+    type: Required[Literal["text"]]
+    """Must be "text" to identify this as text content"""
+
+
+class QueryOpenAIChatCompletionContentPartImageParamImageURL(TypedDict, total=False):
+    url: Required[str]
+    """URL of the image to include in the message"""
+
+    detail: str
+    """(Optional) Level of detail for image processing.
+
+    Can be "low", "high", or "auto"
+    """
+
+
+class QueryOpenAIChatCompletionContentPartImageParam(TypedDict, total=False):
+    image_url: Required[QueryOpenAIChatCompletionContentPartImageParamImageURL]
+    """Image URL specification and processing details"""
+
+    type: Required[Literal["image_url"]]
+    """Must be "image_url" to identify this as image content"""
+
+
+Query: TypeAlias = Union[
+    str, QueryOpenAIChatCompletionContentPartTextParam, QueryOpenAIChatCompletionContentPartImageParam
+]
diff --git a/src/llama_stack_client/types/alpha/inference_rerank_response.py b/src/llama_stack_client/types/alpha/inference_rerank_response.py
new file mode 100644
index 00000000..391f8a3b
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/inference_rerank_response.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import TypeAlias
+
+from ..._models import BaseModel
+
+__all__ = ["InferenceRerankResponse", "InferenceRerankResponseItem"]
+
+
+class InferenceRerankResponseItem(BaseModel):
+    index: int
+    """The original index of the document in the input list"""
+
+    relevance_score: float
+    """The relevance score from the model output.
+
+    Values are inverted when applicable so that higher scores indicate greater
+    relevance.
+    """
+
+
+InferenceRerankResponse: TypeAlias = List[InferenceRerankResponseItem]
diff --git a/src/llama_stack_client/types/alpha/inference_step.py b/src/llama_stack_client/types/alpha/inference_step.py
new file mode 100644
index 00000000..a7e446d1
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/inference_step.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from ..._models import BaseModel
+from ..shared.completion_message import CompletionMessage
+
+__all__ = ["InferenceStep"]
+
+
+class InferenceStep(BaseModel):
+    api_model_response: CompletionMessage = FieldInfo(alias="model_response")
+    """The response from the LLM."""
+
+    step_id: str
+    """The ID of the step."""
+
+    step_type: Literal["inference"]
+    """Type of the step in an agent turn."""
+
+    turn_id: str
+    """The ID of the turn."""
+
+    completed_at: Optional[datetime] = None
+    """The time the step completed."""
+
+    started_at: Optional[datetime] = None
+    """The time the step started."""
diff --git a/src/llama_stack_client/types/alpha/job.py b/src/llama_stack_client/types/alpha/job.py
new file mode 100644
index 00000000..23506692
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/job.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["Job"]
+
+
+class Job(BaseModel):
+    job_id: str
+    """Unique identifier for the job"""
+
+    status: Literal["completed", "in_progress", "failed", "scheduled", "cancelled"]
+    """Current execution status of the job"""
diff --git a/src/llama_stack_client/types/alpha/list_benchmarks_response.py b/src/llama_stack_client/types/alpha/list_benchmarks_response.py
new file mode 100644
index 00000000..accaf36c
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/list_benchmarks_response.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+from .benchmark_list_response import BenchmarkListResponse
+
+__all__ = ["ListBenchmarksResponse"]
+
+
+class ListBenchmarksResponse(BaseModel):
+    data: BenchmarkListResponse
diff --git a/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py b/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
new file mode 100644
index 00000000..7af3bd96
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+from .post_training.job_list_response import JobListResponse
+
+__all__ = ["ListPostTrainingJobsResponse"]
+
+
+class ListPostTrainingJobsResponse(BaseModel):
+    data: JobListResponse
diff --git a/src/llama_stack_client/types/alpha/memory_retrieval_step.py b/src/llama_stack_client/types/alpha/memory_retrieval_step.py
new file mode 100644
index 00000000..787453af
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/memory_retrieval_step.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.interleaved_content import InterleavedContent
+
+__all__ = ["MemoryRetrievalStep"]
+
+
+class MemoryRetrievalStep(BaseModel):
+    inserted_context: InterleavedContent
+    """The context retrieved from the vector databases."""
+
+    step_id: str
+    """The ID of the step."""
+
+    step_type: Literal["memory_retrieval"]
+    """Type of the step in an agent turn."""
+
+    turn_id: str
+    """The ID of the turn."""
+
+    vector_store_ids: str
+    """The IDs of the vector databases to retrieve context from."""
+
+    completed_at: Optional[datetime] = None
+    """The time the step completed."""
+
+    started_at: Optional[datetime] = None
+    """The time the step started."""
diff --git a/src/llama_stack_client/types/alpha/post_training/__init__.py b/src/llama_stack_client/types/alpha/post_training/__init__.py
index d14ed874..8b609eaa 100644
--- a/src/llama_stack_client/types/alpha/post_training/__init__.py
+++ b/src/llama_stack_client/types/alpha/post_training/__init__.py
@@ -7,3 +7,10 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
+
+from .job_cancel_params import JobCancelParams as JobCancelParams
+from .job_list_response import JobListResponse as JobListResponse
+from .job_status_params import JobStatusParams as JobStatusParams
+from .job_status_response import JobStatusResponse as JobStatusResponse
+from .job_artifacts_params import JobArtifactsParams as JobArtifactsParams
+from .job_artifacts_response import JobArtifactsResponse as JobArtifactsResponse
diff --git a/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py b/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
new file mode 100644
index 00000000..851ebf5f
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["JobArtifactsParams"]
+
+
+class JobArtifactsParams(TypedDict, total=False):
+    job_uuid: Required[str]
+    """The UUID of the job to get the artifacts of."""
diff --git a/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py b/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
new file mode 100644
index 00000000..74edff26
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
@@ -0,0 +1,50 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from datetime import datetime
+
+from ...._models import BaseModel
+
+__all__ = ["JobArtifactsResponse", "Checkpoint", "CheckpointTrainingMetrics"]
+
+
+class CheckpointTrainingMetrics(BaseModel):
+    epoch: int
+    """Training epoch number"""
+
+    perplexity: float
+    """Perplexity metric indicating model confidence"""
+
+    train_loss: float
+    """Loss value on the training dataset"""
+
+    validation_loss: float
+    """Loss value on the validation dataset"""
+
+
+class Checkpoint(BaseModel):
+    created_at: datetime
+    """Timestamp when the checkpoint was created"""
+
+    epoch: int
+    """Training epoch when the checkpoint was saved"""
+
+    identifier: str
+    """Unique identifier for the checkpoint"""
+
+    path: str
+    """File system path where the checkpoint is stored"""
+
+    post_training_job_id: str
+    """Identifier of the training job that created this checkpoint"""
+
+    training_metrics: Optional[CheckpointTrainingMetrics] = None
+    """(Optional) Training metrics associated with this checkpoint"""
+
+
+class JobArtifactsResponse(BaseModel):
+    checkpoints: List[Checkpoint]
+    """List of model checkpoints created during training"""
+
+    job_uuid: str
+    """Unique identifier for the training job"""
diff --git a/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py b/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
new file mode 100644
index 00000000..3a976e87
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["JobCancelParams"]
+
+
+class JobCancelParams(TypedDict, total=False):
+    job_uuid: Required[str]
+    """The UUID of the job to cancel."""
diff --git a/src/llama_stack_client/types/alpha/post_training/job_list_response.py b/src/llama_stack_client/types/alpha/post_training/job_list_response.py
new file mode 100644
index 00000000..33bd89f1
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/post_training/job_list_response.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import TypeAlias
+
+from ...._models import BaseModel
+
+__all__ = ["JobListResponse", "JobListResponseItem"]
+
+
+class JobListResponseItem(BaseModel):
+    job_uuid: str
+
+
+JobListResponse: TypeAlias = List[JobListResponseItem]
diff --git a/src/llama_stack_client/types/alpha/post_training/job_status_params.py b/src/llama_stack_client/types/alpha/post_training/job_status_params.py
new file mode 100644
index 00000000..d5e040e0
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/post_training/job_status_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["JobStatusParams"]
+
+
+class JobStatusParams(TypedDict, total=False):
+    job_uuid: Required[str]
+    """The UUID of the job to get the status of."""
diff --git a/src/llama_stack_client/types/alpha/post_training/job_status_response.py b/src/llama_stack_client/types/alpha/post_training/job_status_response.py
new file mode 100644
index 00000000..1ccc9ca2
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/post_training/job_status_response.py
@@ -0,0 +1,66 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["JobStatusResponse", "Checkpoint", "CheckpointTrainingMetrics"]
+
+
+class CheckpointTrainingMetrics(BaseModel):
+    epoch: int
+    """Training epoch number"""
+
+    perplexity: float
+    """Perplexity metric indicating model confidence"""
+
+    train_loss: float
+    """Loss value on the training dataset"""
+
+    validation_loss: float
+    """Loss value on the validation dataset"""
+
+
+class Checkpoint(BaseModel):
+    created_at: datetime
+    """Timestamp when the checkpoint was created"""
+
+    epoch: int
+    """Training epoch when the checkpoint was saved"""
+
+    identifier: str
+    """Unique identifier for the checkpoint"""
+
+    path: str
+    """File system path where the checkpoint is stored"""
+
+    post_training_job_id: str
+    """Identifier of the training job that created this checkpoint"""
+
+    training_metrics: Optional[CheckpointTrainingMetrics] = None
+    """(Optional) Training metrics associated with this checkpoint"""
+
+
+class JobStatusResponse(BaseModel):
+    checkpoints: List[Checkpoint]
+    """List of model checkpoints created during training"""
+
+    job_uuid: str
+    """Unique identifier for the training job"""
+
+    status: Literal["completed", "in_progress", "failed", "scheduled", "cancelled"]
+    """Current status of the training job"""
+
+    completed_at: Optional[datetime] = None
+    """(Optional) Timestamp when the job finished, if completed"""
+
+    resources_allocated: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
+    """(Optional) Information about computational resources allocated to the job"""
+
+    scheduled_at: Optional[datetime] = None
+    """(Optional) Timestamp when the job was scheduled"""
+
+    started_at: Optional[datetime] = None
+    """(Optional) Timestamp when the job execution began"""
diff --git a/src/llama_stack_client/types/alpha/post_training_job.py b/src/llama_stack_client/types/alpha/post_training_job.py
new file mode 100644
index 00000000..7d9417db
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/post_training_job.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["PostTrainingJob"]
+
+
+class PostTrainingJob(BaseModel):
+    job_uuid: str
diff --git a/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py b/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
new file mode 100644
index 00000000..2dcd294d
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
@@ -0,0 +1,123 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = [
+    "PostTrainingPreferenceOptimizeParams",
+    "AlgorithmConfig",
+    "TrainingConfig",
+    "TrainingConfigDataConfig",
+    "TrainingConfigEfficiencyConfig",
+    "TrainingConfigOptimizerConfig",
+]
+
+
+class PostTrainingPreferenceOptimizeParams(TypedDict, total=False):
+    algorithm_config: Required[AlgorithmConfig]
+    """The algorithm configuration."""
+
+    finetuned_model: Required[str]
+    """The model to fine-tune."""
+
+    hyperparam_search_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """The hyperparam search configuration."""
+
+    job_uuid: Required[str]
+    """The UUID of the job to create."""
+
+    logger_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """The logger configuration."""
+
+    training_config: Required[TrainingConfig]
+    """The training configuration."""
+
+
+class AlgorithmConfig(TypedDict, total=False):
+    beta: Required[float]
+    """Temperature parameter for the DPO loss"""
+
+    loss_type: Required[Literal["sigmoid", "hinge", "ipo", "kto_pair"]]
+    """The type of loss function to use for DPO"""
+
+
+class TrainingConfigDataConfig(TypedDict, total=False):
+    batch_size: Required[int]
+    """Number of samples per training batch"""
+
+    data_format: Required[Literal["instruct", "dialog"]]
+    """Format of the dataset (instruct or dialog)"""
+
+    dataset_id: Required[str]
+    """Unique identifier for the training dataset"""
+
+    shuffle: Required[bool]
+    """Whether to shuffle the dataset during training"""
+
+    packed: bool
+    """
+    (Optional) Whether to pack multiple samples into a single sequence for
+    efficiency
+    """
+
+    train_on_input: bool
+    """(Optional) Whether to compute loss on input tokens as well as output tokens"""
+
+    validation_dataset_id: str
+    """(Optional) Unique identifier for the validation dataset"""
+
+
+class TrainingConfigEfficiencyConfig(TypedDict, total=False):
+    enable_activation_checkpointing: bool
+    """(Optional) Whether to use activation checkpointing to reduce memory usage"""
+
+    enable_activation_offloading: bool
+    """(Optional) Whether to offload activations to CPU to save GPU memory"""
+
+    fsdp_cpu_offload: bool
+    """(Optional) Whether to offload FSDP parameters to CPU"""
+
+    memory_efficient_fsdp_wrap: bool
+    """(Optional) Whether to use memory-efficient FSDP wrapping"""
+
+
+class TrainingConfigOptimizerConfig(TypedDict, total=False):
+    lr: Required[float]
+    """Learning rate for the optimizer"""
+
+    num_warmup_steps: Required[int]
+    """Number of steps for learning rate warmup"""
+
+    optimizer_type: Required[Literal["adam", "adamw", "sgd"]]
+    """Type of optimizer to use (adam, adamw, or sgd)"""
+
+    weight_decay: Required[float]
+    """Weight decay coefficient for regularization"""
+
+
+class TrainingConfig(TypedDict, total=False):
+    gradient_accumulation_steps: Required[int]
+    """Number of steps to accumulate gradients before updating"""
+
+    max_steps_per_epoch: Required[int]
+    """Maximum number of steps to run per epoch"""
+
+    n_epochs: Required[int]
+    """Number of training epochs to run"""
+
+    data_config: TrainingConfigDataConfig
+    """(Optional) Configuration for data loading and formatting"""
+
+    dtype: str
+    """(Optional) Data type for model parameters (bf16, fp16, fp32)"""
+
+    efficiency_config: TrainingConfigEfficiencyConfig
+    """(Optional) Configuration for memory and compute optimizations"""
+
+    max_validation_steps: int
+    """(Optional) Maximum number of validation steps per epoch"""
+
+    optimizer_config: TrainingConfigOptimizerConfig
+    """(Optional) Configuration for the optimization algorithm"""
diff --git a/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py b/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
new file mode 100644
index 00000000..c23796f0
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
@@ -0,0 +1,119 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .algorithm_config_param import AlgorithmConfigParam
+
+__all__ = [
+    "PostTrainingSupervisedFineTuneParams",
+    "TrainingConfig",
+    "TrainingConfigDataConfig",
+    "TrainingConfigEfficiencyConfig",
+    "TrainingConfigOptimizerConfig",
+]
+
+
+class PostTrainingSupervisedFineTuneParams(TypedDict, total=False):
+    hyperparam_search_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """The hyperparam search configuration."""
+
+    job_uuid: Required[str]
+    """The UUID of the job to create."""
+
+    logger_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """The logger configuration."""
+
+    training_config: Required[TrainingConfig]
+    """The training configuration."""
+
+    algorithm_config: AlgorithmConfigParam
+    """The algorithm configuration."""
+
+    checkpoint_dir: str
+    """The directory to save checkpoint(s) to."""
+
+    model: str
+    """The model to fine-tune."""
+
+
+class TrainingConfigDataConfig(TypedDict, total=False):
+    batch_size: Required[int]
+    """Number of samples per training batch"""
+
+    data_format: Required[Literal["instruct", "dialog"]]
+    """Format of the dataset (instruct or dialog)"""
+
+    dataset_id: Required[str]
+    """Unique identifier for the training dataset"""
+
+    shuffle: Required[bool]
+    """Whether to shuffle the dataset during training"""
+
+    packed: bool
+    """
+    (Optional) Whether to pack multiple samples into a single sequence for
+    efficiency
+    """
+
+    train_on_input: bool
+    """(Optional) Whether to compute loss on input tokens as well as output tokens"""
+
+    validation_dataset_id: str
+    """(Optional) Unique identifier for the validation dataset"""
+
+
+class TrainingConfigEfficiencyConfig(TypedDict, total=False):
+    enable_activation_checkpointing: bool
+    """(Optional) Whether to use activation checkpointing to reduce memory usage"""
+
+    enable_activation_offloading: bool
+    """(Optional) Whether to offload activations to CPU to save GPU memory"""
+
+    fsdp_cpu_offload: bool
+    """(Optional) Whether to offload FSDP parameters to CPU"""
+
+    memory_efficient_fsdp_wrap: bool
+    """(Optional) Whether to use memory-efficient FSDP wrapping"""
+
+
+class TrainingConfigOptimizerConfig(TypedDict, total=False):
+    lr: Required[float]
+    """Learning rate for the optimizer"""
+
+    num_warmup_steps: Required[int]
+    """Number of steps for learning rate warmup"""
+
+    optimizer_type: Required[Literal["adam", "adamw", "sgd"]]
+    """Type of optimizer to use (adam, adamw, or sgd)"""
+
+    weight_decay: Required[float]
+    """Weight decay coefficient for regularization"""
+
+
+class TrainingConfig(TypedDict, total=False):
+    gradient_accumulation_steps: Required[int]
+    """Number of steps to accumulate gradients before updating"""
+
+    max_steps_per_epoch: Required[int]
+    """Maximum number of steps to run per epoch"""
+
+    n_epochs: Required[int]
+    """Number of training epochs to run"""
+
+    data_config: TrainingConfigDataConfig
+    """(Optional) Configuration for data loading and formatting"""
+
+    dtype: str
+    """(Optional) Data type for model parameters (bf16, fp16, fp32)"""
+
+    efficiency_config: TrainingConfigEfficiencyConfig
+    """(Optional) Configuration for memory and compute optimizations"""
+
+    max_validation_steps: int
+    """(Optional) Maximum number of validation steps per epoch"""
+
+    optimizer_config: TrainingConfigOptimizerConfig
+    """(Optional) Configuration for the optimization algorithm"""
diff --git a/src/llama_stack_client/types/alpha/shield_call_step.py b/src/llama_stack_client/types/alpha/shield_call_step.py
new file mode 100644
index 00000000..80176555
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/shield_call_step.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.safety_violation import SafetyViolation
+
+__all__ = ["ShieldCallStep"]
+
+
+class ShieldCallStep(BaseModel):
+    step_id: str
+    """The ID of the step."""
+
+    step_type: Literal["shield_call"]
+    """Type of the step in an agent turn."""
+
+    turn_id: str
+    """The ID of the turn."""
+
+    completed_at: Optional[datetime] = None
+    """The time the step completed."""
+
+    started_at: Optional[datetime] = None
+    """The time the step started."""
+
+    violation: Optional[SafetyViolation] = None
+    """The violation from the shield call."""
diff --git a/src/llama_stack_client/types/alpha/tool_execution_step.py b/src/llama_stack_client/types/alpha/tool_execution_step.py
new file mode 100644
index 00000000..1761e889
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/tool_execution_step.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .tool_response import ToolResponse
+from ..shared.tool_call import ToolCall
+
+__all__ = ["ToolExecutionStep"]
+
+
+class ToolExecutionStep(BaseModel):
+    step_id: str
+    """The ID of the step."""
+
+    step_type: Literal["tool_execution"]
+    """Type of the step in an agent turn."""
+
+    tool_calls: List[ToolCall]
+    """The tool calls to execute."""
+
+    tool_responses: List[ToolResponse]
+    """The tool responses from the tool calls."""
+
+    turn_id: str
+    """The ID of the turn."""
+
+    completed_at: Optional[datetime] = None
+    """The time the step completed."""
+
+    started_at: Optional[datetime] = None
+    """The time the step started."""
diff --git a/src/llama_stack_client/types/alpha/tool_response.py b/src/llama_stack_client/types/alpha/tool_response.py
new file mode 100644
index 00000000..fb749f75
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/tool_response.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from ..shared.interleaved_content import InterleavedContent
+
+__all__ = ["ToolResponse"]
+
+
+class ToolResponse(BaseModel):
+    call_id: str
+    """Unique identifier for the tool call this response is for"""
+
+    content: InterleavedContent
+    """The response content from the tool"""
+
+    tool_name: Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]
+    """Name of the tool that was invoked"""
+
+    metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
+    """(Optional) Additional metadata about the tool response"""
diff --git a/src/llama_stack_client/types/alpha/tool_response_param.py b/src/llama_stack_client/types/alpha/tool_response_param.py
new file mode 100644
index 00000000..e833211f
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/tool_response_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from ..shared_params.interleaved_content import InterleavedContent
+
+__all__ = ["ToolResponseParam"]
+
+
+class ToolResponseParam(TypedDict, total=False):
+    call_id: Required[str]
+    """Unique identifier for the tool call this response is for"""
+
+    content: Required[InterleavedContent]
+    """The response content from the tool"""
+
+    tool_name: Required[Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]]
+    """Name of the tool that was invoked"""
+
+    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """(Optional) Additional metadata about the tool response"""
diff --git a/src/llama_stack_client/types/beta/__init__.py b/src/llama_stack_client/types/beta/__init__.py
index f8ee8b14..aab8d1b8 100644
--- a/src/llama_stack_client/types/beta/__init__.py
+++ b/src/llama_stack_client/types/beta/__init__.py
@@ -1,3 +1,12 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
+
+from .dataset_list_response import DatasetListResponse as DatasetListResponse
+from .list_datasets_response import ListDatasetsResponse as ListDatasetsResponse
+from .dataset_iterrows_params import DatasetIterrowsParams as DatasetIterrowsParams
+from .dataset_register_params import DatasetRegisterParams as DatasetRegisterParams
+from .dataset_appendrows_params import DatasetAppendrowsParams as DatasetAppendrowsParams
+from .dataset_iterrows_response import DatasetIterrowsResponse as DatasetIterrowsResponse
+from .dataset_register_response import DatasetRegisterResponse as DatasetRegisterResponse
+from .dataset_retrieve_response import DatasetRetrieveResponse as DatasetRetrieveResponse
diff --git a/src/llama_stack_client/types/beta/dataset_appendrows_params.py b/src/llama_stack_client/types/beta/dataset_appendrows_params.py
new file mode 100644
index 00000000..2e96e124
--- /dev/null
+++ b/src/llama_stack_client/types/beta/dataset_appendrows_params.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Required, TypedDict
+
+__all__ = ["DatasetAppendrowsParams"]
+
+
+class DatasetAppendrowsParams(TypedDict, total=False):
+    rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
+    """The rows to append to the dataset."""
diff --git a/src/llama_stack_client/types/beta/dataset_iterrows_params.py b/src/llama_stack_client/types/beta/dataset_iterrows_params.py
new file mode 100644
index 00000000..99065312
--- /dev/null
+++ b/src/llama_stack_client/types/beta/dataset_iterrows_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["DatasetIterrowsParams"]
+
+
+class DatasetIterrowsParams(TypedDict, total=False):
+    limit: int
+    """The number of rows to get."""
+
+    start_index: int
+    """Index into dataset for the first row to get. Get all rows if None."""
diff --git a/src/llama_stack_client/types/beta/dataset_iterrows_response.py b/src/llama_stack_client/types/beta/dataset_iterrows_response.py
new file mode 100644
index 00000000..ec7d06b4
--- /dev/null
+++ b/src/llama_stack_client/types/beta/dataset_iterrows_response.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["DatasetIterrowsResponse"]
+
+
+class DatasetIterrowsResponse(BaseModel):
+    data: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
+    """The list of items for the current page"""
+
+    has_more: bool
+    """Whether there are more items available after this set"""
+
+    url: Optional[str] = None
+    """The URL for accessing this list"""
diff --git a/src/llama_stack_client/types/beta/dataset_list_response.py b/src/llama_stack_client/types/beta/dataset_list_response.py
new file mode 100644
index 00000000..2553a1a3
--- /dev/null
+++ b/src/llama_stack_client/types/beta/dataset_list_response.py
@@ -0,0 +1,66 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+    "DatasetListResponse",
+    "DatasetListResponseItem",
+    "DatasetListResponseItemSource",
+    "DatasetListResponseItemSourceUriDataSource",
+    "DatasetListResponseItemSourceRowsDataSource",
+]
+
+
+class DatasetListResponseItemSourceUriDataSource(BaseModel):
+    type: Literal["uri"]
+
+    uri: str
+    """The dataset can be obtained from a URI.
+
+    E.g. - "https://mywebsite.com/mydata.jsonl" - "lsfs://mydata.jsonl" -
+    "data:csv;base64,{base64_content}"
+    """
+
+
+class DatasetListResponseItemSourceRowsDataSource(BaseModel):
+    rows: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
+    """The dataset is stored in rows.
+
+    E.g. - [ {"messages": [{"role": "user", "content": "Hello, world!"}, {"role":
+    "assistant", "content": "Hello, world!"}]} ]
+    """
+
+    type: Literal["rows"]
+
+
+DatasetListResponseItemSource: TypeAlias = Annotated[
+    Union[DatasetListResponseItemSourceUriDataSource, DatasetListResponseItemSourceRowsDataSource],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DatasetListResponseItem(BaseModel):
+    identifier: str
+
+    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
+    """Additional metadata for the dataset"""
+
+    provider_id: str
+
+    purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]
+    """Purpose of the dataset indicating its intended use"""
+
+    source: DatasetListResponseItemSource
+    """Data source configuration for the dataset"""
+
+    type: Literal["dataset"]
+    """Type of resource, always 'dataset' for datasets"""
+
+    provider_resource_id: Optional[str] = None
+
+
+DatasetListResponse: TypeAlias = List[DatasetListResponseItem]
diff --git a/src/llama_stack_client/types/beta/dataset_register_params.py b/src/llama_stack_client/types/beta/dataset_register_params.py
new file mode 100644
index 00000000..6fd5db3f
--- /dev/null
+++ b/src/llama_stack_client/types/beta/dataset_register_params.py
@@ -0,0 +1,69 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = ["DatasetRegisterParams", "Source", "SourceUriDataSource", "SourceRowsDataSource"]
+
+
+class DatasetRegisterParams(TypedDict, total=False):
+    purpose: Required[Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]]
+    """The purpose of the dataset.
+
+    One of: - "post-training/messages": The dataset contains a messages column with
+    list of messages for post-training. { "messages": [ {"role": "user", "content":
+    "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] } -
+    "eval/question-answer": The dataset contains a question column and an answer
+    column for evaluation. { "question": "What is the capital of France?", "answer":
+    "Paris" } - "eval/messages-answer": The dataset contains a messages column with
+    list of messages and an answer column for evaluation. { "messages": [ {"role":
+    "user", "content": "Hello, my name is John Doe."}, {"role": "assistant",
+    "content": "Hello, John Doe. How can I help you today?"}, {"role": "user",
+    "content": "What's my name?"}, ], "answer": "John Doe" }
+    """
+
+    source: Required[Source]
+    """The data source of the dataset.
+
+    Ensure that the data source schema is compatible with the purpose of the
+    dataset. Examples: - { "type": "uri", "uri":
+    "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
+    "lsfs://mydata.jsonl" } - { "type": "uri", "uri":
+    "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
+    "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows", "rows": [
+    { "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
+    "assistant", "content": "Hello, world!"}, ] } ] }
+    """
+
+    dataset_id: str
+    """The ID of the dataset. If not provided, an ID will be generated."""
+
+    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """The metadata for the dataset. - E.g. {"description": "My dataset"}."""
+
+
+class SourceUriDataSource(TypedDict, total=False):
+    type: Required[Literal["uri"]]
+
+    uri: Required[str]
+    """The dataset can be obtained from a URI.
+
+    E.g. - "https://mywebsite.com/mydata.jsonl" - "lsfs://mydata.jsonl" -
+    "data:csv;base64,{base64_content}"
+    """
+
+
+class SourceRowsDataSource(TypedDict, total=False):
+    rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
+    """The dataset is stored in rows.
+
+    E.g. - [ {"messages": [{"role": "user", "content": "Hello, world!"}, {"role":
+    "assistant", "content": "Hello, world!"}]} ]
+    """
+
+    type: Required[Literal["rows"]]
+
+
+Source: TypeAlias = Union[SourceUriDataSource, SourceRowsDataSource]
diff --git a/src/llama_stack_client/types/beta/dataset_register_response.py b/src/llama_stack_client/types/beta/dataset_register_response.py
new file mode 100644
index 00000000..ee12b860
--- /dev/null
+++ b/src/llama_stack_client/types/beta/dataset_register_response.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["DatasetRegisterResponse", "Source", "SourceUriDataSource", "SourceRowsDataSource"]
+
+
+class SourceUriDataSource(BaseModel):
+    type: Literal["uri"]
+
+    uri: str
+    """The dataset can be obtained from a URI.
+
+    E.g. - "https://mywebsite.com/mydata.jsonl" - "lsfs://mydata.jsonl" -
+    "data:csv;base64,{base64_content}"
+    """
+
+
+class SourceRowsDataSource(BaseModel):
+    rows: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
+    """The dataset is stored in rows.
+
+    E.g. - [ {"messages": [{"role": "user", "content": "Hello, world!"}, {"role":
+    "assistant", "content": "Hello, world!"}]} ]
+    """
+
+    type: Literal["rows"]
+
+
+Source: TypeAlias = Annotated[Union[SourceUriDataSource, SourceRowsDataSource], PropertyInfo(discriminator="type")]
+
+
+class DatasetRegisterResponse(BaseModel):
+    identifier: str
+
+    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
+    """Additional metadata for the dataset"""
+
+    provider_id: str
+
+    purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]
+    """Purpose of the dataset indicating its intended use"""
+
+    source: Source
+    """Data source configuration for the dataset"""
+
+    type: Literal["dataset"]
+    """Type of resource, always 'dataset' for datasets"""
+
+    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/beta/dataset_retrieve_response.py b/src/llama_stack_client/types/beta/dataset_retrieve_response.py
new file mode 100644
index 00000000..5e2cc0ca
--- /dev/null
+++ b/src/llama_stack_client/types/beta/dataset_retrieve_response.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["DatasetRetrieveResponse", "Source", "SourceUriDataSource", "SourceRowsDataSource"]
+
+
+class SourceUriDataSource(BaseModel):
+    type: Literal["uri"]
+
+    uri: str
+    """The dataset can be obtained from a URI.
+
+    E.g. - "https://mywebsite.com/mydata.jsonl" - "lsfs://mydata.jsonl" -
+    "data:csv;base64,{base64_content}"
+    """
+
+
+class SourceRowsDataSource(BaseModel):
+    rows: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
+    """The dataset is stored in rows.
+
+    E.g. - [ {"messages": [{"role": "user", "content": "Hello, world!"}, {"role":
+    "assistant", "content": "Hello, world!"}]} ]
+    """
+
+    type: Literal["rows"]
+
+
+Source: TypeAlias = Annotated[Union[SourceUriDataSource, SourceRowsDataSource], PropertyInfo(discriminator="type")]
+
+
+class DatasetRetrieveResponse(BaseModel):
+    identifier: str
+
+    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
+    """Additional metadata for the dataset"""
+
+    provider_id: str
+
+    purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]
+    """Purpose of the dataset indicating its intended use"""
+
+    source: Source
+    """Data source configuration for the dataset"""
+
+    type: Literal["dataset"]
+    """Type of resource, always 'dataset' for datasets"""
+
+    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/beta/list_datasets_response.py b/src/llama_stack_client/types/beta/list_datasets_response.py
new file mode 100644
index 00000000..7caa3220
--- /dev/null
+++ b/src/llama_stack_client/types/beta/list_datasets_response.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+from .dataset_list_response import DatasetListResponse
+
+__all__ = ["ListDatasetsResponse"]
+
+
+class ListDatasetsResponse(BaseModel):
+    data: DatasetListResponse
+    """List of datasets"""
diff --git a/src/llama_stack_client/types/shared/__init__.py b/src/llama_stack_client/types/shared/__init__.py
index a4aa1259..c18a9358 100644
--- a/src/llama_stack_client/types/shared/__init__.py
+++ b/src/llama_stack_client/types/shared/__init__.py
@@ -10,11 +10,14 @@
 from .document import Document as Document
 from .tool_call import ToolCall as ToolCall
 from .param_type import ParamType as ParamType
+from .agent_config import AgentConfig as AgentConfig
 from .query_config import QueryConfig as QueryConfig
 from .query_result import QueryResult as QueryResult
 from .user_message import UserMessage as UserMessage
 from .scoring_result import ScoringResult as ScoringResult
 from .system_message import SystemMessage as SystemMessage
+from .response_format import ResponseFormat as ResponseFormat
+from .sampling_params import SamplingParams as SamplingParams
 from .safety_violation import SafetyViolation as SafetyViolation
 from .completion_message import CompletionMessage as CompletionMessage
 from .interleaved_content import InterleavedContent as InterleavedContent
diff --git a/src/llama_stack_client/types/shared/agent_config.py b/src/llama_stack_client/types/shared/agent_config.py
new file mode 100644
index 00000000..eb116159
--- /dev/null
+++ b/src/llama_stack_client/types/shared/agent_config.py
@@ -0,0 +1,92 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from ..tool_def import ToolDef
+from .response_format import ResponseFormat
+from .sampling_params import SamplingParams
+
+__all__ = ["AgentConfig", "ToolConfig", "Toolgroup", "ToolgroupAgentToolGroupWithArgs"]
+
+
+class ToolConfig(BaseModel):
+    system_message_behavior: Optional[Literal["append", "replace"]] = None
+    """(Optional) Config for how to override the default system prompt.
+
+    - `SystemMessageBehavior.append`: Appends the provided system message to the
+      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
+      system prompt with the provided system message. The system message can include
+      the string '{{function_definitions}}' to indicate where the function
+      definitions should be inserted.
+    """
+
+    tool_choice: Union[Literal["auto", "required", "none"], str, None] = None
+    """(Optional) Whether tool use is automatic, required, or none.
+
+    Can also specify a tool name to use a specific tool. Defaults to
+    ToolChoice.auto.
+    """
+
+    tool_prompt_format: Optional[Literal["json", "function_tag", "python_list"]] = None
+    """(Optional) Instructs the model how to format tool calls.
+
+    By default, Llama Stack will attempt to use a format that is best adapted to the
+    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
+    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+    are output as Python syntax -- a list of function calls.
+    """
+
+
+class ToolgroupAgentToolGroupWithArgs(BaseModel):
+    args: Dict[str, Union[bool, float, str, List[object], object, None]]
+
+    name: str
+
+
+Toolgroup: TypeAlias = Union[str, ToolgroupAgentToolGroupWithArgs]
+
+
+class AgentConfig(BaseModel):
+    instructions: str
+    """The system instructions for the agent"""
+
+    model: str
+    """The model identifier to use for the agent"""
+
+    client_tools: Optional[List[ToolDef]] = None
+
+    enable_session_persistence: Optional[bool] = None
+    """Optional flag indicating whether session data has to be persisted"""
+
+    input_shields: Optional[List[str]] = None
+
+    max_infer_iters: Optional[int] = None
+
+    name: Optional[str] = None
+    """Optional name for the agent, used in telemetry and identification"""
+
+    output_shields: Optional[List[str]] = None
+
+    response_format: Optional[ResponseFormat] = None
+    """Optional response format configuration"""
+
+    sampling_params: Optional[SamplingParams] = None
+    """Sampling parameters."""
+
+    tool_choice: Optional[Literal["auto", "required", "none"]] = None
+    """Whether tool use is required or automatic.
+
+    This is a hint to the model which may not be followed. It depends on the
+    Instruction Following capabilities of the model.
+    """
+
+    tool_config: Optional[ToolConfig] = None
+    """Configuration for tool use."""
+
+    tool_prompt_format: Optional[Literal["json", "function_tag", "python_list"]] = None
+    """Prompt format for calling custom / zero shot tools."""
+
+    toolgroups: Optional[List[Toolgroup]] = None
diff --git a/src/llama_stack_client/types/shared/response_format.py b/src/llama_stack_client/types/shared/response_format.py
new file mode 100644
index 00000000..537df8d5
--- /dev/null
+++ b/src/llama_stack_client/types/shared/response_format.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormat", "JsonSchemaResponseFormat", "GrammarResponseFormat"]
+
+
+class JsonSchemaResponseFormat(BaseModel):
+    json_schema: Dict[str, Union[bool, float, str, List[object], object, None]]
+    """The JSON schema the response should conform to.
+
+    In a Python SDK, this is often a `pydantic` model.
+    """
+
+    type: Literal["json_schema"]
+    """Must be "json_schema" to identify this format type"""
+
+
+class GrammarResponseFormat(BaseModel):
+    bnf: Dict[str, Union[bool, float, str, List[object], object, None]]
+    """The BNF grammar specification the response should conform to"""
+
+    type: Literal["grammar"]
+    """Must be "grammar" to identify this format type"""
+
+
+ResponseFormat: TypeAlias = Annotated[
+    Union[JsonSchemaResponseFormat, GrammarResponseFormat], PropertyInfo(discriminator="type")
+]
diff --git a/src/llama_stack_client/types/shared/sampling_params.py b/src/llama_stack_client/types/shared/sampling_params.py
new file mode 100644
index 00000000..6823aee7
--- /dev/null
+++ b/src/llama_stack_client/types/shared/sampling_params.py
@@ -0,0 +1,70 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+    "SamplingParams",
+    "Strategy",
+    "StrategyGreedySamplingStrategy",
+    "StrategyTopPSamplingStrategy",
+    "StrategyTopKSamplingStrategy",
+]
+
+
+class StrategyGreedySamplingStrategy(BaseModel):
+    type: Literal["greedy"]
+    """Must be "greedy" to identify this sampling strategy"""
+
+
+class StrategyTopPSamplingStrategy(BaseModel):
+    type: Literal["top_p"]
+    """Must be "top_p" to identify this sampling strategy"""
+
+    temperature: Optional[float] = None
+    """Controls randomness in sampling. Higher values increase randomness"""
+
+    top_p: Optional[float] = None
+    """Cumulative probability threshold for nucleus sampling. Defaults to 0.95"""
+
+
+class StrategyTopKSamplingStrategy(BaseModel):
+    top_k: int
+    """Number of top tokens to consider for sampling. Must be at least 1"""
+
+    type: Literal["top_k"]
+    """Must be "top_k" to identify this sampling strategy"""
+
+
+Strategy: TypeAlias = Annotated[
+    Union[StrategyGreedySamplingStrategy, StrategyTopPSamplingStrategy, StrategyTopKSamplingStrategy],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class SamplingParams(BaseModel):
+    strategy: Strategy
+    """The sampling strategy."""
+
+    max_tokens: Optional[int] = None
+    """The maximum number of tokens that can be generated in the completion.
+
+    The token count of your prompt plus max_tokens cannot exceed the model's context
+    length.
+    """
+
+    repetition_penalty: Optional[float] = None
+    """Number between -2.0 and 2.0.
+
+    Positive values penalize new tokens based on whether they appear in the text so
+    far, increasing the model's likelihood to talk about new topics.
+    """
+
+    stop: Optional[List[str]] = None
+    """Up to 4 sequences where the API will stop generating further tokens.
+
+    The returned text will not contain the stop sequence.
+    """
diff --git a/src/llama_stack_client/types/shared_params/__init__.py b/src/llama_stack_client/types/shared_params/__init__.py
index 4ce940e0..12061849 100644
--- a/src/llama_stack_client/types/shared_params/__init__.py
+++ b/src/llama_stack_client/types/shared_params/__init__.py
@@ -9,9 +9,12 @@
 from .message import Message as Message
 from .document import Document as Document
 from .tool_call import ToolCall as ToolCall
+from .agent_config import AgentConfig as AgentConfig
 from .query_config import QueryConfig as QueryConfig
 from .user_message import UserMessage as UserMessage
 from .system_message import SystemMessage as SystemMessage
+from .response_format import ResponseFormat as ResponseFormat
+from .sampling_params import SamplingParams as SamplingParams
 from .completion_message import CompletionMessage as CompletionMessage
 from .interleaved_content import InterleavedContent as InterleavedContent
 from .tool_response_message import ToolResponseMessage as ToolResponseMessage
diff --git a/src/llama_stack_client/types/shared_params/agent_config.py b/src/llama_stack_client/types/shared_params/agent_config.py
new file mode 100644
index 00000000..c1206bd5
--- /dev/null
+++ b/src/llama_stack_client/types/shared_params/agent_config.py
@@ -0,0 +1,94 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+from ..tool_def_param import ToolDefParam
+from .response_format import ResponseFormat
+from .sampling_params import SamplingParams
+
+__all__ = ["AgentConfig", "ToolConfig", "Toolgroup", "ToolgroupAgentToolGroupWithArgs"]
+
+
+class ToolConfig(TypedDict, total=False):
+    system_message_behavior: Literal["append", "replace"]
+    """(Optional) Config for how to override the default system prompt.
+
+    - `SystemMessageBehavior.append`: Appends the provided system message to the
+      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
+      system prompt with the provided system message. The system message can include
+      the string '{{function_definitions}}' to indicate where the function
+      definitions should be inserted.
+    """
+
+    tool_choice: Union[Literal["auto", "required", "none"], str]
+    """(Optional) Whether tool use is automatic, required, or none.
+
+    Can also specify a tool name to use a specific tool. Defaults to
+    ToolChoice.auto.
+    """
+
+    tool_prompt_format: Literal["json", "function_tag", "python_list"]
+    """(Optional) Instructs the model how to format tool calls.
+
+    By default, Llama Stack will attempt to use a format that is best adapted to the
+    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
+    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+    are output as Python syntax -- a list of function calls.
+    """
+
+
+class ToolgroupAgentToolGroupWithArgs(TypedDict, total=False):
+    args: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+
+    name: Required[str]
+
+
+Toolgroup: TypeAlias = Union[str, ToolgroupAgentToolGroupWithArgs]
+
+
+class AgentConfig(TypedDict, total=False):
+    instructions: Required[str]
+    """The system instructions for the agent"""
+
+    model: Required[str]
+    """The model identifier to use for the agent"""
+
+    client_tools: Iterable[ToolDefParam]
+
+    enable_session_persistence: bool
+    """Optional flag indicating whether session data has to be persisted"""
+
+    input_shields: SequenceNotStr[str]
+
+    max_infer_iters: int
+
+    name: str
+    """Optional name for the agent, used in telemetry and identification"""
+
+    output_shields: SequenceNotStr[str]
+
+    response_format: ResponseFormat
+    """Optional response format configuration"""
+
+    sampling_params: SamplingParams
+    """Sampling parameters."""
+
+    tool_choice: Literal["auto", "required", "none"]
+    """Whether tool use is required or automatic.
+
+    This is a hint to the model which may not be followed. It depends on the
+    Instruction Following capabilities of the model.
+    """
+
+    tool_config: ToolConfig
+    """Configuration for tool use."""
+
+    tool_prompt_format: Literal["json", "function_tag", "python_list"]
+    """Prompt format for calling custom / zero shot tools."""
+
+    toolgroups: SequenceNotStr[Toolgroup]
diff --git a/src/llama_stack_client/types/shared_params/response_format.py b/src/llama_stack_client/types/shared_params/response_format.py
new file mode 100644
index 00000000..53411700
--- /dev/null
+++ b/src/llama_stack_client/types/shared_params/response_format.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = ["ResponseFormat", "JsonSchemaResponseFormat", "GrammarResponseFormat"]
+
+
+class JsonSchemaResponseFormat(TypedDict, total=False):
+    json_schema: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """The JSON schema the response should conform to.
+
+    In a Python SDK, this is often a `pydantic` model.
+    """
+
+    type: Required[Literal["json_schema"]]
+    """Must be "json_schema" to identify this format type"""
+
+
+class GrammarResponseFormat(TypedDict, total=False):
+    bnf: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """The BNF grammar specification the response should conform to"""
+
+    type: Required[Literal["grammar"]]
+    """Must be "grammar" to identify this format type"""
+
+
+ResponseFormat: TypeAlias = Union[JsonSchemaResponseFormat, GrammarResponseFormat]
diff --git a/src/llama_stack_client/types/shared_params/sampling_params.py b/src/llama_stack_client/types/shared_params/sampling_params.py
new file mode 100644
index 00000000..e5eebddd
--- /dev/null
+++ b/src/llama_stack_client/types/shared_params/sampling_params.py
@@ -0,0 +1,68 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+
+__all__ = [
+    "SamplingParams",
+    "Strategy",
+    "StrategyGreedySamplingStrategy",
+    "StrategyTopPSamplingStrategy",
+    "StrategyTopKSamplingStrategy",
+]
+
+
+class StrategyGreedySamplingStrategy(TypedDict, total=False):
+    type: Required[Literal["greedy"]]
+    """Must be "greedy" to identify this sampling strategy"""
+
+
+class StrategyTopPSamplingStrategy(TypedDict, total=False):
+    type: Required[Literal["top_p"]]
+    """Must be "top_p" to identify this sampling strategy"""
+
+    temperature: float
+    """Controls randomness in sampling. Higher values increase randomness"""
+
+    top_p: float
+    """Cumulative probability threshold for nucleus sampling. Defaults to 0.95"""
+
+
+class StrategyTopKSamplingStrategy(TypedDict, total=False):
+    top_k: Required[int]
+    """Number of top tokens to consider for sampling. Must be at least 1"""
+
+    type: Required[Literal["top_k"]]
+    """Must be "top_k" to identify this sampling strategy"""
+
+
+Strategy: TypeAlias = Union[StrategyGreedySamplingStrategy, StrategyTopPSamplingStrategy, StrategyTopKSamplingStrategy]
+
+
+class SamplingParams(TypedDict, total=False):
+    strategy: Required[Strategy]
+    """The sampling strategy."""
+
+    max_tokens: int
+    """The maximum number of tokens that can be generated in the completion.
+
+    The token count of your prompt plus max_tokens cannot exceed the model's context
+    length.
+    """
+
+    repetition_penalty: float
+    """Number between -2.0 and 2.0.
+
+    Positive values penalize new tokens based on whether they appear in the text so
+    far, increasing the model's likelihood to talk about new topics.
+    """
+
+    stop: SequenceNotStr[str]
+    """Up to 4 sequences where the API will stop generating further tokens.
+
+    The returned text will not contain the stop sequence.
+    """
diff --git a/src/llama_stack_client/types/tool_def_param.py b/src/llama_stack_client/types/tool_def_param.py
new file mode 100644
index 00000000..d14ef6cc
--- /dev/null
+++ b/src/llama_stack_client/types/tool_def_param.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Required, TypedDict
+
+__all__ = ["ToolDefParam"]
+
+
+class ToolDefParam(TypedDict, total=False):
+    name: Required[str]
+    """Name of the tool"""
+
+    description: str
+    """(Optional) Human-readable description of what the tool does"""
+
+    input_schema: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """(Optional) JSON Schema for tool inputs (MCP inputSchema)"""
+
+    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """(Optional) Additional metadata about the tool"""
+
+    output_schema: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """(Optional) JSON Schema for tool outputs (MCP outputSchema)"""
+
+    toolgroup_id: str
+    """(Optional) ID of the tool group this tool belongs to"""
diff --git a/tests/api_resources/alpha/__init__.py b/tests/api_resources/alpha/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/alpha/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/alpha/agents/__init__.py b/tests/api_resources/alpha/agents/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/alpha/agents/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/alpha/agents/test_session.py b/tests/api_resources/alpha/agents/test_session.py
new file mode 100644
index 00000000..9c49e6bc
--- /dev/null
+++ b/tests/api_resources/alpha/agents/test_session.py
@@ -0,0 +1,416 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types.alpha.agents import (
+    Session,
+    SessionListResponse,
+    SessionCreateResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestSession:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: LlamaStackClient) -> None:
+        session = client.alpha.agents.session.create(
+            agent_id="agent_id",
+            session_name="session_name",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.session.with_raw_response.create(
+            agent_id="agent_id",
+            session_name="session_name",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.session.with_streaming_response.create(
+            agent_id="agent_id",
+            session_name="session_name",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = response.parse()
+            assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            client.alpha.agents.session.with_raw_response.create(
+                agent_id="",
+                session_name="session_name",
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackClient) -> None:
+        session = client.alpha.agents.session.retrieve(
+            session_id="session_id",
+            agent_id="agent_id",
+        )
+        assert_matches_type(Session, session, path=["response"])
+
+    @parametrize
+    def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
+        session = client.alpha.agents.session.retrieve(
+            session_id="session_id",
+            agent_id="agent_id",
+            turn_ids=["string"],
+        )
+        assert_matches_type(Session, session, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.session.with_raw_response.retrieve(
+            session_id="session_id",
+            agent_id="agent_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(Session, session, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.session.with_streaming_response.retrieve(
+            session_id="session_id",
+            agent_id="agent_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = response.parse()
+            assert_matches_type(Session, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            client.alpha.agents.session.with_raw_response.retrieve(
+                session_id="session_id",
+                agent_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            client.alpha.agents.session.with_raw_response.retrieve(
+                session_id="",
+                agent_id="agent_id",
+            )
+
+    @parametrize
+    def test_method_list(self, client: LlamaStackClient) -> None:
+        session = client.alpha.agents.session.list(
+            agent_id="agent_id",
+        )
+        assert_matches_type(SessionListResponse, session, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
+        session = client.alpha.agents.session.list(
+            agent_id="agent_id",
+            limit=0,
+            start_index=0,
+        )
+        assert_matches_type(SessionListResponse, session, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.session.with_raw_response.list(
+            agent_id="agent_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(SessionListResponse, session, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.session.with_streaming_response.list(
+            agent_id="agent_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = response.parse()
+            assert_matches_type(SessionListResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            client.alpha.agents.session.with_raw_response.list(
+                agent_id="",
+            )
+
+    @parametrize
+    def test_method_delete(self, client: LlamaStackClient) -> None:
+        session = client.alpha.agents.session.delete(
+            session_id="session_id",
+            agent_id="agent_id",
+        )
+        assert session is None
+
+    @parametrize
+    def test_raw_response_delete(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.session.with_raw_response.delete(
+            session_id="session_id",
+            agent_id="agent_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert session is None
+
+    @parametrize
+    def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.session.with_streaming_response.delete(
+            session_id="session_id",
+            agent_id="agent_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = response.parse()
+            assert session is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            client.alpha.agents.session.with_raw_response.delete(
+                session_id="session_id",
+                agent_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            client.alpha.agents.session.with_raw_response.delete(
+                session_id="",
+                agent_id="agent_id",
+            )
+
+
+class TestAsyncSession:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
+        session = await async_client.alpha.agents.session.create(
+            agent_id="agent_id",
+            session_name="session_name",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.session.with_raw_response.create(
+            agent_id="agent_id",
+            session_name="session_name",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = await response.parse()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.session.with_streaming_response.create(
+            agent_id="agent_id",
+            session_name="session_name",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = await response.parse()
+            assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            await async_client.alpha.agents.session.with_raw_response.create(
+                agent_id="",
+                session_name="session_name",
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        session = await async_client.alpha.agents.session.retrieve(
+            session_id="session_id",
+            agent_id="agent_id",
+        )
+        assert_matches_type(Session, session, path=["response"])
+
+    @parametrize
+    async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        session = await async_client.alpha.agents.session.retrieve(
+            session_id="session_id",
+            agent_id="agent_id",
+            turn_ids=["string"],
+        )
+        assert_matches_type(Session, session, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.session.with_raw_response.retrieve(
+            session_id="session_id",
+            agent_id="agent_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = await response.parse()
+        assert_matches_type(Session, session, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.session.with_streaming_response.retrieve(
+            session_id="session_id",
+            agent_id="agent_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = await response.parse()
+            assert_matches_type(Session, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            await async_client.alpha.agents.session.with_raw_response.retrieve(
+                session_id="session_id",
+                agent_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            await async_client.alpha.agents.session.with_raw_response.retrieve(
+                session_id="",
+                agent_id="agent_id",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
+        session = await async_client.alpha.agents.session.list(
+            agent_id="agent_id",
+        )
+        assert_matches_type(SessionListResponse, session, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        session = await async_client.alpha.agents.session.list(
+            agent_id="agent_id",
+            limit=0,
+            start_index=0,
+        )
+        assert_matches_type(SessionListResponse, session, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.session.with_raw_response.list(
+            agent_id="agent_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = await response.parse()
+        assert_matches_type(SessionListResponse, session, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.session.with_streaming_response.list(
+            agent_id="agent_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = await response.parse()
+            assert_matches_type(SessionListResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            await async_client.alpha.agents.session.with_raw_response.list(
+                agent_id="",
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        session = await async_client.alpha.agents.session.delete(
+            session_id="session_id",
+            agent_id="agent_id",
+        )
+        assert session is None
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.session.with_raw_response.delete(
+            session_id="session_id",
+            agent_id="agent_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = await response.parse()
+        assert session is None
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.session.with_streaming_response.delete(
+            session_id="session_id",
+            agent_id="agent_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = await response.parse()
+            assert session is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            await async_client.alpha.agents.session.with_raw_response.delete(
+                session_id="session_id",
+                agent_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            await async_client.alpha.agents.session.with_raw_response.delete(
+                session_id="",
+                agent_id="agent_id",
+            )
diff --git a/tests/api_resources/alpha/agents/test_steps.py b/tests/api_resources/alpha/agents/test_steps.py
new file mode 100644
index 00000000..5bf35fc3
--- /dev/null
+++ b/tests/api_resources/alpha/agents/test_steps.py
@@ -0,0 +1,172 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types.alpha.agents import StepRetrieveResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestSteps:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackClient) -> None:
+        step = client.alpha.agents.steps.retrieve(
+            step_id="step_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            turn_id="turn_id",
+        )
+        assert_matches_type(StepRetrieveResponse, step, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.steps.with_raw_response.retrieve(
+            step_id="step_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            turn_id="turn_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        step = response.parse()
+        assert_matches_type(StepRetrieveResponse, step, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.steps.with_streaming_response.retrieve(
+            step_id="step_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            turn_id="turn_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            step = response.parse()
+            assert_matches_type(StepRetrieveResponse, step, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            client.alpha.agents.steps.with_raw_response.retrieve(
+                step_id="step_id",
+                agent_id="",
+                session_id="session_id",
+                turn_id="turn_id",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            client.alpha.agents.steps.with_raw_response.retrieve(
+                step_id="step_id",
+                agent_id="agent_id",
+                session_id="",
+                turn_id="turn_id",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
+            client.alpha.agents.steps.with_raw_response.retrieve(
+                step_id="step_id",
+                agent_id="agent_id",
+                session_id="session_id",
+                turn_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
+            client.alpha.agents.steps.with_raw_response.retrieve(
+                step_id="",
+                agent_id="agent_id",
+                session_id="session_id",
+                turn_id="turn_id",
+            )
+
+
+class TestAsyncSteps:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        step = await async_client.alpha.agents.steps.retrieve(
+            step_id="step_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            turn_id="turn_id",
+        )
+        assert_matches_type(StepRetrieveResponse, step, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.steps.with_raw_response.retrieve(
+            step_id="step_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            turn_id="turn_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        step = await response.parse()
+        assert_matches_type(StepRetrieveResponse, step, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.steps.with_streaming_response.retrieve(
+            step_id="step_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            turn_id="turn_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            step = await response.parse()
+            assert_matches_type(StepRetrieveResponse, step, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            await async_client.alpha.agents.steps.with_raw_response.retrieve(
+                step_id="step_id",
+                agent_id="",
+                session_id="session_id",
+                turn_id="turn_id",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            await async_client.alpha.agents.steps.with_raw_response.retrieve(
+                step_id="step_id",
+                agent_id="agent_id",
+                session_id="",
+                turn_id="turn_id",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
+            await async_client.alpha.agents.steps.with_raw_response.retrieve(
+                step_id="step_id",
+                agent_id="agent_id",
+                session_id="session_id",
+                turn_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
+            await async_client.alpha.agents.steps.with_raw_response.retrieve(
+                step_id="",
+                agent_id="agent_id",
+                session_id="session_id",
+                turn_id="turn_id",
+            )
diff --git a/tests/api_resources/alpha/agents/test_turn.py b/tests/api_resources/alpha/agents/test_turn.py
new file mode 100644
index 00000000..9a2a500f
--- /dev/null
+++ b/tests/api_resources/alpha/agents/test_turn.py
@@ -0,0 +1,1030 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types.alpha.agents import Turn
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestTurn:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create_overload_1(self, client: LlamaStackClient) -> None:
+        turn = client.alpha.agents.turn.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+        )
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
+        turn = client.alpha.agents.turn.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "context": "string",
+                }
+            ],
+            documents=[
+                {
+                    "content": "string",
+                    "mime_type": "mime_type",
+                }
+            ],
+            stream=False,
+            tool_config={
+                "system_message_behavior": "append",
+                "tool_choice": "auto",
+                "tool_prompt_format": "json",
+            },
+            toolgroups=["string"],
+        )
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.turn.with_raw_response.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        turn = response.parse()
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.turn.with_streaming_response.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            turn = response.parse()
+            assert_matches_type(Turn, turn, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create_overload_1(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            client.alpha.agents.turn.with_raw_response.create(
+                session_id="session_id",
+                agent_id="",
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                    }
+                ],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            client.alpha.agents.turn.with_raw_response.create(
+                session_id="",
+                agent_id="agent_id",
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                    }
+                ],
+            )
+
+    @parametrize
+    def test_method_create_overload_2(self, client: LlamaStackClient) -> None:
+        turn_stream = client.alpha.agents.turn.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            stream=True,
+        )
+        turn_stream.response.close()
+
+    @parametrize
+    def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient) -> None:
+        turn_stream = client.alpha.agents.turn.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "context": "string",
+                }
+            ],
+            stream=True,
+            documents=[
+                {
+                    "content": "string",
+                    "mime_type": "mime_type",
+                }
+            ],
+            tool_config={
+                "system_message_behavior": "append",
+                "tool_choice": "auto",
+                "tool_prompt_format": "json",
+            },
+            toolgroups=["string"],
+        )
+        turn_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.turn.with_raw_response.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.turn.with_streaming_response.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create_overload_2(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            client.alpha.agents.turn.with_raw_response.create(
+                session_id="session_id",
+                agent_id="",
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                    }
+                ],
+                stream=True,
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            client.alpha.agents.turn.with_raw_response.create(
+                session_id="",
+                agent_id="agent_id",
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                    }
+                ],
+                stream=True,
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackClient) -> None:
+        turn = client.alpha.agents.turn.retrieve(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+        )
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.turn.with_raw_response.retrieve(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        turn = response.parse()
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.turn.with_streaming_response.retrieve(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            turn = response.parse()
+            assert_matches_type(Turn, turn, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            client.alpha.agents.turn.with_raw_response.retrieve(
+                turn_id="turn_id",
+                agent_id="",
+                session_id="session_id",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            client.alpha.agents.turn.with_raw_response.retrieve(
+                turn_id="turn_id",
+                agent_id="agent_id",
+                session_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
+            client.alpha.agents.turn.with_raw_response.retrieve(
+                turn_id="",
+                agent_id="agent_id",
+                session_id="session_id",
+            )
+
+    @parametrize
+    def test_method_resume_overload_1(self, client: LlamaStackClient) -> None:
+        turn = client.alpha.agents.turn.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                }
+            ],
+        )
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    def test_method_resume_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
+        turn = client.alpha.agents.turn.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                    "metadata": {"foo": True},
+                }
+            ],
+            stream=False,
+        )
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    def test_raw_response_resume_overload_1(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.turn.with_raw_response.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        turn = response.parse()
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    def test_streaming_response_resume_overload_1(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.turn.with_streaming_response.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            turn = response.parse()
+            assert_matches_type(Turn, turn, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_resume_overload_1(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            client.alpha.agents.turn.with_raw_response.resume(
+                turn_id="turn_id",
+                agent_id="",
+                session_id="session_id",
+                tool_responses=[
+                    {
+                        "call_id": "call_id",
+                        "content": "string",
+                        "tool_name": "brave_search",
+                    }
+                ],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            client.alpha.agents.turn.with_raw_response.resume(
+                turn_id="turn_id",
+                agent_id="agent_id",
+                session_id="",
+                tool_responses=[
+                    {
+                        "call_id": "call_id",
+                        "content": "string",
+                        "tool_name": "brave_search",
+                    }
+                ],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
+            client.alpha.agents.turn.with_raw_response.resume(
+                turn_id="",
+                agent_id="agent_id",
+                session_id="session_id",
+                tool_responses=[
+                    {
+                        "call_id": "call_id",
+                        "content": "string",
+                        "tool_name": "brave_search",
+                    }
+                ],
+            )
+
+    @parametrize
+    def test_method_resume_overload_2(self, client: LlamaStackClient) -> None:
+        turn_stream = client.alpha.agents.turn.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            stream=True,
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                }
+            ],
+        )
+        turn_stream.response.close()
+
+    @parametrize
+    def test_raw_response_resume_overload_2(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.turn.with_raw_response.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            stream=True,
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                }
+            ],
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_resume_overload_2(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.turn.with_streaming_response.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            stream=True,
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_resume_overload_2(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            client.alpha.agents.turn.with_raw_response.resume(
+                turn_id="turn_id",
+                agent_id="",
+                session_id="session_id",
+                stream=True,
+                tool_responses=[
+                    {
+                        "call_id": "call_id",
+                        "content": "string",
+                        "tool_name": "brave_search",
+                    }
+                ],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            client.alpha.agents.turn.with_raw_response.resume(
+                turn_id="turn_id",
+                agent_id="agent_id",
+                session_id="",
+                stream=True,
+                tool_responses=[
+                    {
+                        "call_id": "call_id",
+                        "content": "string",
+                        "tool_name": "brave_search",
+                    }
+                ],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
+            client.alpha.agents.turn.with_raw_response.resume(
+                turn_id="",
+                agent_id="agent_id",
+                session_id="session_id",
+                stream=True,
+                tool_responses=[
+                    {
+                        "call_id": "call_id",
+                        "content": "string",
+                        "tool_name": "brave_search",
+                    }
+                ],
+            )
+
+
+class TestAsyncTurn:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
+        turn = await async_client.alpha.agents.turn.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+        )
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
+        turn = await async_client.alpha.agents.turn.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "context": "string",
+                }
+            ],
+            documents=[
+                {
+                    "content": "string",
+                    "mime_type": "mime_type",
+                }
+            ],
+            stream=False,
+            tool_config={
+                "system_message_behavior": "append",
+                "tool_choice": "auto",
+                "tool_prompt_format": "json",
+            },
+            toolgroups=["string"],
+        )
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.turn.with_raw_response.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        turn = await response.parse()
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.turn.with_streaming_response.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            turn = await response.parse()
+            assert_matches_type(Turn, turn, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            await async_client.alpha.agents.turn.with_raw_response.create(
+                session_id="session_id",
+                agent_id="",
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                    }
+                ],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            await async_client.alpha.agents.turn.with_raw_response.create(
+                session_id="",
+                agent_id="agent_id",
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                    }
+                ],
+            )
+
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
+        turn_stream = await async_client.alpha.agents.turn.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            stream=True,
+        )
+        await turn_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
+        turn_stream = await async_client.alpha.agents.turn.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                    "context": "string",
+                }
+            ],
+            stream=True,
+            documents=[
+                {
+                    "content": "string",
+                    "mime_type": "mime_type",
+                }
+            ],
+            tool_config={
+                "system_message_behavior": "append",
+                "tool_choice": "auto",
+                "tool_prompt_format": "json",
+            },
+            toolgroups=["string"],
+        )
+        await turn_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.turn.with_raw_response.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = await response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.turn.with_streaming_response.create(
+            session_id="session_id",
+            agent_id="agent_id",
+            messages=[
+                {
+                    "content": "string",
+                    "role": "user",
+                }
+            ],
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            await async_client.alpha.agents.turn.with_raw_response.create(
+                session_id="session_id",
+                agent_id="",
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                    }
+                ],
+                stream=True,
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            await async_client.alpha.agents.turn.with_raw_response.create(
+                session_id="",
+                agent_id="agent_id",
+                messages=[
+                    {
+                        "content": "string",
+                        "role": "user",
+                    }
+                ],
+                stream=True,
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        turn = await async_client.alpha.agents.turn.retrieve(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+        )
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.turn.with_raw_response.retrieve(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        turn = await response.parse()
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.turn.with_streaming_response.retrieve(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            turn = await response.parse()
+            assert_matches_type(Turn, turn, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            await async_client.alpha.agents.turn.with_raw_response.retrieve(
+                turn_id="turn_id",
+                agent_id="",
+                session_id="session_id",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            await async_client.alpha.agents.turn.with_raw_response.retrieve(
+                turn_id="turn_id",
+                agent_id="agent_id",
+                session_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
+            await async_client.alpha.agents.turn.with_raw_response.retrieve(
+                turn_id="",
+                agent_id="agent_id",
+                session_id="session_id",
+            )
+
+    @parametrize
+    async def test_method_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
+        turn = await async_client.alpha.agents.turn.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                }
+            ],
+        )
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    async def test_method_resume_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
+        turn = await async_client.alpha.agents.turn.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                    "metadata": {"foo": True},
+                }
+            ],
+            stream=False,
+        )
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    async def test_raw_response_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.turn.with_raw_response.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        turn = await response.parse()
+        assert_matches_type(Turn, turn, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.turn.with_streaming_response.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            turn = await response.parse()
+            assert_matches_type(Turn, turn, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            await async_client.alpha.agents.turn.with_raw_response.resume(
+                turn_id="turn_id",
+                agent_id="",
+                session_id="session_id",
+                tool_responses=[
+                    {
+                        "call_id": "call_id",
+                        "content": "string",
+                        "tool_name": "brave_search",
+                    }
+                ],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            await async_client.alpha.agents.turn.with_raw_response.resume(
+                turn_id="turn_id",
+                agent_id="agent_id",
+                session_id="",
+                tool_responses=[
+                    {
+                        "call_id": "call_id",
+                        "content": "string",
+                        "tool_name": "brave_search",
+                    }
+                ],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
+            await async_client.alpha.agents.turn.with_raw_response.resume(
+                turn_id="",
+                agent_id="agent_id",
+                session_id="session_id",
+                tool_responses=[
+                    {
+                        "call_id": "call_id",
+                        "content": "string",
+                        "tool_name": "brave_search",
+                    }
+                ],
+            )
+
+    @parametrize
+    async def test_method_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
+        turn_stream = await async_client.alpha.agents.turn.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            stream=True,
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                }
+            ],
+        )
+        await turn_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.turn.with_raw_response.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            stream=True,
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                }
+            ],
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = await response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.turn.with_streaming_response.resume(
+            turn_id="turn_id",
+            agent_id="agent_id",
+            session_id="session_id",
+            stream=True,
+            tool_responses=[
+                {
+                    "call_id": "call_id",
+                    "content": "string",
+                    "tool_name": "brave_search",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            await async_client.alpha.agents.turn.with_raw_response.resume(
+                turn_id="turn_id",
+                agent_id="",
+                session_id="session_id",
+                stream=True,
+                tool_responses=[
+                    {
+                        "call_id": "call_id",
+                        "content": "string",
+                        "tool_name": "brave_search",
+                    }
+                ],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
+            await async_client.alpha.agents.turn.with_raw_response.resume(
+                turn_id="turn_id",
+                agent_id="agent_id",
+                session_id="",
+                stream=True,
+                tool_responses=[
+                    {
+                        "call_id": "call_id",
+                        "content": "string",
+                        "tool_name": "brave_search",
+                    }
+                ],
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
+            await async_client.alpha.agents.turn.with_raw_response.resume(
+                turn_id="",
+                agent_id="agent_id",
+                session_id="session_id",
+                stream=True,
+                tool_responses=[
+                    {
+                        "call_id": "call_id",
+                        "content": "string",
+                        "tool_name": "brave_search",
+                    }
+                ],
+            )
diff --git a/tests/api_resources/alpha/eval/__init__.py b/tests/api_resources/alpha/eval/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/alpha/eval/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/alpha/eval/test_jobs.py b/tests/api_resources/alpha/eval/test_jobs.py
new file mode 100644
index 00000000..f4ea9ce1
--- /dev/null
+++ b/tests/api_resources/alpha/eval/test_jobs.py
@@ -0,0 +1,312 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types.alpha import Job, EvaluateResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestJobs:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackClient) -> None:
+        job = client.alpha.eval.jobs.retrieve(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        )
+        assert_matches_type(EvaluateResponse, job, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
+        response = client.alpha.eval.jobs.with_raw_response.retrieve(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(EvaluateResponse, job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
+        with client.alpha.eval.jobs.with_streaming_response.retrieve(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(EvaluateResponse, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            client.alpha.eval.jobs.with_raw_response.retrieve(
+                job_id="job_id",
+                benchmark_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
+            client.alpha.eval.jobs.with_raw_response.retrieve(
+                job_id="",
+                benchmark_id="benchmark_id",
+            )
+
+    @parametrize
+    def test_method_cancel(self, client: LlamaStackClient) -> None:
+        job = client.alpha.eval.jobs.cancel(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        )
+        assert job is None
+
+    @parametrize
+    def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
+        response = client.alpha.eval.jobs.with_raw_response.cancel(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert job is None
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
+        with client.alpha.eval.jobs.with_streaming_response.cancel(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert job is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            client.alpha.eval.jobs.with_raw_response.cancel(
+                job_id="job_id",
+                benchmark_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
+            client.alpha.eval.jobs.with_raw_response.cancel(
+                job_id="",
+                benchmark_id="benchmark_id",
+            )
+
+    @parametrize
+    def test_method_status(self, client: LlamaStackClient) -> None:
+        job = client.alpha.eval.jobs.status(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        )
+        assert_matches_type(Job, job, path=["response"])
+
+    @parametrize
+    def test_raw_response_status(self, client: LlamaStackClient) -> None:
+        response = client.alpha.eval.jobs.with_raw_response.status(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(Job, job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_status(self, client: LlamaStackClient) -> None:
+        with client.alpha.eval.jobs.with_streaming_response.status(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(Job, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_status(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            client.alpha.eval.jobs.with_raw_response.status(
+                job_id="job_id",
+                benchmark_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
+            client.alpha.eval.jobs.with_raw_response.status(
+                job_id="",
+                benchmark_id="benchmark_id",
+            )
+
+
+class TestAsyncJobs:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        job = await async_client.alpha.eval.jobs.retrieve(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        )
+        assert_matches_type(EvaluateResponse, job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.eval.jobs.with_raw_response.retrieve(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = await response.parse()
+        assert_matches_type(EvaluateResponse, job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.eval.jobs.with_streaming_response.retrieve(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(EvaluateResponse, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            await async_client.alpha.eval.jobs.with_raw_response.retrieve(
+                job_id="job_id",
+                benchmark_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
+            await async_client.alpha.eval.jobs.with_raw_response.retrieve(
+                job_id="",
+                benchmark_id="benchmark_id",
+            )
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        job = await async_client.alpha.eval.jobs.cancel(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        )
+        assert job is None
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.eval.jobs.with_raw_response.cancel(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = await response.parse()
+        assert job is None
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.eval.jobs.with_streaming_response.cancel(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert job is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            await async_client.alpha.eval.jobs.with_raw_response.cancel(
+                job_id="job_id",
+                benchmark_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
+            await async_client.alpha.eval.jobs.with_raw_response.cancel(
+                job_id="",
+                benchmark_id="benchmark_id",
+            )
+
+    @parametrize
+    async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
+        job = await async_client.alpha.eval.jobs.status(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        )
+        assert_matches_type(Job, job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.eval.jobs.with_raw_response.status(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = await response.parse()
+        assert_matches_type(Job, job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_status(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.eval.jobs.with_streaming_response.status(
+            job_id="job_id",
+            benchmark_id="benchmark_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(Job, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_status(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            await async_client.alpha.eval.jobs.with_raw_response.status(
+                job_id="job_id",
+                benchmark_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
+            await async_client.alpha.eval.jobs.with_raw_response.status(
+                job_id="",
+                benchmark_id="benchmark_id",
+            )
diff --git a/tests/api_resources/alpha/post_training/__init__.py b/tests/api_resources/alpha/post_training/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/alpha/post_training/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/alpha/post_training/test_job.py b/tests/api_resources/alpha/post_training/test_job.py
new file mode 100644
index 00000000..bec18796
--- /dev/null
+++ b/tests/api_resources/alpha/post_training/test_job.py
@@ -0,0 +1,264 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types.alpha.post_training import (
+    JobListResponse,
+    JobStatusResponse,
+    JobArtifactsResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestJob:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_list(self, client: LlamaStackClient) -> None:
+        job = client.alpha.post_training.job.list()
+        assert_matches_type(JobListResponse, job, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: LlamaStackClient) -> None:
+        response = client.alpha.post_training.job.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(JobListResponse, job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
+        with client.alpha.post_training.job.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(JobListResponse, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_artifacts(self, client: LlamaStackClient) -> None:
+        job = client.alpha.post_training.job.artifacts(
+            job_uuid="job_uuid",
+        )
+        assert_matches_type(JobArtifactsResponse, job, path=["response"])
+
+    @parametrize
+    def test_raw_response_artifacts(self, client: LlamaStackClient) -> None:
+        response = client.alpha.post_training.job.with_raw_response.artifacts(
+            job_uuid="job_uuid",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(JobArtifactsResponse, job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_artifacts(self, client: LlamaStackClient) -> None:
+        with client.alpha.post_training.job.with_streaming_response.artifacts(
+            job_uuid="job_uuid",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(JobArtifactsResponse, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_cancel(self, client: LlamaStackClient) -> None:
+        job = client.alpha.post_training.job.cancel(
+            job_uuid="job_uuid",
+        )
+        assert job is None
+
+    @parametrize
+    def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
+        response = client.alpha.post_training.job.with_raw_response.cancel(
+            job_uuid="job_uuid",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert job is None
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
+        with client.alpha.post_training.job.with_streaming_response.cancel(
+            job_uuid="job_uuid",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert job is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_status(self, client: LlamaStackClient) -> None:
+        job = client.alpha.post_training.job.status(
+            job_uuid="job_uuid",
+        )
+        assert_matches_type(JobStatusResponse, job, path=["response"])
+
+    @parametrize
+    def test_raw_response_status(self, client: LlamaStackClient) -> None:
+        response = client.alpha.post_training.job.with_raw_response.status(
+            job_uuid="job_uuid",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = response.parse()
+        assert_matches_type(JobStatusResponse, job, path=["response"])
+
+    @parametrize
+    def test_streaming_response_status(self, client: LlamaStackClient) -> None:
+        with client.alpha.post_training.job.with_streaming_response.status(
+            job_uuid="job_uuid",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = response.parse()
+            assert_matches_type(JobStatusResponse, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncJob:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
+        job = await async_client.alpha.post_training.job.list()
+        assert_matches_type(JobListResponse, job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.post_training.job.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = await response.parse()
+        assert_matches_type(JobListResponse, job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.post_training.job.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(JobListResponse, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
+        job = await async_client.alpha.post_training.job.artifacts(
+            job_uuid="job_uuid",
+        )
+        assert_matches_type(JobArtifactsResponse, job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.post_training.job.with_raw_response.artifacts(
+            job_uuid="job_uuid",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = await response.parse()
+        assert_matches_type(JobArtifactsResponse, job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.post_training.job.with_streaming_response.artifacts(
+            job_uuid="job_uuid",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(JobArtifactsResponse, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        job = await async_client.alpha.post_training.job.cancel(
+            job_uuid="job_uuid",
+        )
+        assert job is None
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.post_training.job.with_raw_response.cancel(
+            job_uuid="job_uuid",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = await response.parse()
+        assert job is None
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.post_training.job.with_streaming_response.cancel(
+            job_uuid="job_uuid",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert job is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
+        job = await async_client.alpha.post_training.job.status(
+            job_uuid="job_uuid",
+        )
+        assert_matches_type(JobStatusResponse, job, path=["response"])
+
+    @parametrize
+    async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.post_training.job.with_raw_response.status(
+            job_uuid="job_uuid",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        job = await response.parse()
+        assert_matches_type(JobStatusResponse, job, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_status(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.post_training.job.with_streaming_response.status(
+            job_uuid="job_uuid",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            job = await response.parse()
+            assert_matches_type(JobStatusResponse, job, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/alpha/test_agents.py b/tests/api_resources/alpha/test_agents.py
new file mode 100644
index 00000000..075bd478
--- /dev/null
+++ b/tests/api_resources/alpha/test_agents.py
@@ -0,0 +1,412 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types.alpha import (
+    AgentListResponse,
+    AgentCreateResponse,
+    AgentRetrieveResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestAgents:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: LlamaStackClient) -> None:
+        agent = client.alpha.agents.create(
+            agent_config={
+                "instructions": "instructions",
+                "model": "model",
+            },
+        )
+        assert_matches_type(AgentCreateResponse, agent, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
+        agent = client.alpha.agents.create(
+            agent_config={
+                "instructions": "instructions",
+                "model": "model",
+                "client_tools": [
+                    {
+                        "name": "name",
+                        "description": "description",
+                        "input_schema": {"foo": True},
+                        "metadata": {"foo": True},
+                        "output_schema": {"foo": True},
+                        "toolgroup_id": "toolgroup_id",
+                    }
+                ],
+                "enable_session_persistence": True,
+                "input_shields": ["string"],
+                "max_infer_iters": 0,
+                "name": "name",
+                "output_shields": ["string"],
+                "response_format": {
+                    "json_schema": {"foo": True},
+                    "type": "json_schema",
+                },
+                "sampling_params": {
+                    "strategy": {"type": "greedy"},
+                    "max_tokens": 0,
+                    "repetition_penalty": 0,
+                    "stop": ["string"],
+                },
+                "tool_choice": "auto",
+                "tool_config": {
+                    "system_message_behavior": "append",
+                    "tool_choice": "auto",
+                    "tool_prompt_format": "json",
+                },
+                "tool_prompt_format": "json",
+                "toolgroups": ["string"],
+            },
+        )
+        assert_matches_type(AgentCreateResponse, agent, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.with_raw_response.create(
+            agent_config={
+                "instructions": "instructions",
+                "model": "model",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        agent = response.parse()
+        assert_matches_type(AgentCreateResponse, agent, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.with_streaming_response.create(
+            agent_config={
+                "instructions": "instructions",
+                "model": "model",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            agent = response.parse()
+            assert_matches_type(AgentCreateResponse, agent, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackClient) -> None:
+        agent = client.alpha.agents.retrieve(
+            "agent_id",
+        )
+        assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.with_raw_response.retrieve(
+            "agent_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        agent = response.parse()
+        assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.with_streaming_response.retrieve(
+            "agent_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            agent = response.parse()
+            assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            client.alpha.agents.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: LlamaStackClient) -> None:
+        agent = client.alpha.agents.list()
+        assert_matches_type(AgentListResponse, agent, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
+        agent = client.alpha.agents.list(
+            limit=0,
+            start_index=0,
+        )
+        assert_matches_type(AgentListResponse, agent, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        agent = response.parse()
+        assert_matches_type(AgentListResponse, agent, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            agent = response.parse()
+            assert_matches_type(AgentListResponse, agent, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: LlamaStackClient) -> None:
+        agent = client.alpha.agents.delete(
+            "agent_id",
+        )
+        assert agent is None
+
+    @parametrize
+    def test_raw_response_delete(self, client: LlamaStackClient) -> None:
+        response = client.alpha.agents.with_raw_response.delete(
+            "agent_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        agent = response.parse()
+        assert agent is None
+
+    @parametrize
+    def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
+        with client.alpha.agents.with_streaming_response.delete(
+            "agent_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            agent = response.parse()
+            assert agent is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            client.alpha.agents.with_raw_response.delete(
+                "",
+            )
+
+
+class TestAsyncAgents:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
+        agent = await async_client.alpha.agents.create(
+            agent_config={
+                "instructions": "instructions",
+                "model": "model",
+            },
+        )
+        assert_matches_type(AgentCreateResponse, agent, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        agent = await async_client.alpha.agents.create(
+            agent_config={
+                "instructions": "instructions",
+                "model": "model",
+                "client_tools": [
+                    {
+                        "name": "name",
+                        "description": "description",
+                        "input_schema": {"foo": True},
+                        "metadata": {"foo": True},
+                        "output_schema": {"foo": True},
+                        "toolgroup_id": "toolgroup_id",
+                    }
+                ],
+                "enable_session_persistence": True,
+                "input_shields": ["string"],
+                "max_infer_iters": 0,
+                "name": "name",
+                "output_shields": ["string"],
+                "response_format": {
+                    "json_schema": {"foo": True},
+                    "type": "json_schema",
+                },
+                "sampling_params": {
+                    "strategy": {"type": "greedy"},
+                    "max_tokens": 0,
+                    "repetition_penalty": 0,
+                    "stop": ["string"],
+                },
+                "tool_choice": "auto",
+                "tool_config": {
+                    "system_message_behavior": "append",
+                    "tool_choice": "auto",
+                    "tool_prompt_format": "json",
+                },
+                "tool_prompt_format": "json",
+                "toolgroups": ["string"],
+            },
+        )
+        assert_matches_type(AgentCreateResponse, agent, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.with_raw_response.create(
+            agent_config={
+                "instructions": "instructions",
+                "model": "model",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        agent = await response.parse()
+        assert_matches_type(AgentCreateResponse, agent, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.with_streaming_response.create(
+            agent_config={
+                "instructions": "instructions",
+                "model": "model",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            agent = await response.parse()
+            assert_matches_type(AgentCreateResponse, agent, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        agent = await async_client.alpha.agents.retrieve(
+            "agent_id",
+        )
+        assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.with_raw_response.retrieve(
+            "agent_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        agent = await response.parse()
+        assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.with_streaming_response.retrieve(
+            "agent_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            agent = await response.parse()
+            assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            await async_client.alpha.agents.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
+        agent = await async_client.alpha.agents.list()
+        assert_matches_type(AgentListResponse, agent, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        agent = await async_client.alpha.agents.list(
+            limit=0,
+            start_index=0,
+        )
+        assert_matches_type(AgentListResponse, agent, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        agent = await response.parse()
+        assert_matches_type(AgentListResponse, agent, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            agent = await response.parse()
+            assert_matches_type(AgentListResponse, agent, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        agent = await async_client.alpha.agents.delete(
+            "agent_id",
+        )
+        assert agent is None
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.agents.with_raw_response.delete(
+            "agent_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        agent = await response.parse()
+        assert agent is None
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.agents.with_streaming_response.delete(
+            "agent_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            agent = await response.parse()
+            assert agent is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
+            await async_client.alpha.agents.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/alpha/test_benchmarks.py b/tests/api_resources/alpha/test_benchmarks.py
new file mode 100644
index 00000000..98652091
--- /dev/null
+++ b/tests/api_resources/alpha/test_benchmarks.py
@@ -0,0 +1,248 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types.alpha import Benchmark, BenchmarkListResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestBenchmarks:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackClient) -> None:
+        benchmark = client.alpha.benchmarks.retrieve(
+            "benchmark_id",
+        )
+        assert_matches_type(Benchmark, benchmark, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
+        response = client.alpha.benchmarks.with_raw_response.retrieve(
+            "benchmark_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark = response.parse()
+        assert_matches_type(Benchmark, benchmark, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
+        with client.alpha.benchmarks.with_streaming_response.retrieve(
+            "benchmark_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark = response.parse()
+            assert_matches_type(Benchmark, benchmark, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            client.alpha.benchmarks.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: LlamaStackClient) -> None:
+        benchmark = client.alpha.benchmarks.list()
+        assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: LlamaStackClient) -> None:
+        response = client.alpha.benchmarks.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark = response.parse()
+        assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
+        with client.alpha.benchmarks.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark = response.parse()
+            assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_register(self, client: LlamaStackClient) -> None:
+        benchmark = client.alpha.benchmarks.register(
+            benchmark_id="benchmark_id",
+            dataset_id="dataset_id",
+            scoring_functions=["string"],
+        )
+        assert benchmark is None
+
+    @parametrize
+    def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
+        benchmark = client.alpha.benchmarks.register(
+            benchmark_id="benchmark_id",
+            dataset_id="dataset_id",
+            scoring_functions=["string"],
+            metadata={"foo": True},
+            provider_benchmark_id="provider_benchmark_id",
+            provider_id="provider_id",
+        )
+        assert benchmark is None
+
+    @parametrize
+    def test_raw_response_register(self, client: LlamaStackClient) -> None:
+        response = client.alpha.benchmarks.with_raw_response.register(
+            benchmark_id="benchmark_id",
+            dataset_id="dataset_id",
+            scoring_functions=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark = response.parse()
+        assert benchmark is None
+
+    @parametrize
+    def test_streaming_response_register(self, client: LlamaStackClient) -> None:
+        with client.alpha.benchmarks.with_streaming_response.register(
+            benchmark_id="benchmark_id",
+            dataset_id="dataset_id",
+            scoring_functions=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark = response.parse()
+            assert benchmark is None
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncBenchmarks:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        benchmark = await async_client.alpha.benchmarks.retrieve(
+            "benchmark_id",
+        )
+        assert_matches_type(Benchmark, benchmark, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.benchmarks.with_raw_response.retrieve(
+            "benchmark_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark = await response.parse()
+        assert_matches_type(Benchmark, benchmark, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.benchmarks.with_streaming_response.retrieve(
+            "benchmark_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark = await response.parse()
+            assert_matches_type(Benchmark, benchmark, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            await async_client.alpha.benchmarks.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
+        benchmark = await async_client.alpha.benchmarks.list()
+        assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.benchmarks.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark = await response.parse()
+        assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.benchmarks.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark = await response.parse()
+            assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None:
+        benchmark = await async_client.alpha.benchmarks.register(
+            benchmark_id="benchmark_id",
+            dataset_id="dataset_id",
+            scoring_functions=["string"],
+        )
+        assert benchmark is None
+
+    @parametrize
+    async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        benchmark = await async_client.alpha.benchmarks.register(
+            benchmark_id="benchmark_id",
+            dataset_id="dataset_id",
+            scoring_functions=["string"],
+            metadata={"foo": True},
+            provider_benchmark_id="provider_benchmark_id",
+            provider_id="provider_id",
+        )
+        assert benchmark is None
+
+    @parametrize
+    async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.benchmarks.with_raw_response.register(
+            benchmark_id="benchmark_id",
+            dataset_id="dataset_id",
+            scoring_functions=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        benchmark = await response.parse()
+        assert benchmark is None
+
+    @parametrize
+    async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.benchmarks.with_streaming_response.register(
+            benchmark_id="benchmark_id",
+            dataset_id="dataset_id",
+            scoring_functions=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            benchmark = await response.parse()
+            assert benchmark is None
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/alpha/test_eval.py b/tests/api_resources/alpha/test_eval.py
new file mode 100644
index 00000000..88bd0c0c
--- /dev/null
+++ b/tests/api_resources/alpha/test_eval.py
@@ -0,0 +1,1115 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types.alpha import (
+    Job,
+    EvaluateResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestEval:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_evaluate_rows(self, client: LlamaStackClient) -> None:
+        eval = client.alpha.eval.evaluate_rows(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        )
+        assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> None:
+        eval = client.alpha.eval.evaluate_rows(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {
+                        "strategy": {"type": "greedy"},
+                        "max_tokens": 0,
+                        "repetition_penalty": 0,
+                        "stop": ["string"],
+                    },
+                    "type": "model",
+                    "system_message": {
+                        "content": "string",
+                        "role": "system",
+                    },
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                        "prompt_template": "prompt_template",
+                    }
+                },
+                "num_examples": 0,
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        )
+        assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None:
+        response = client.alpha.eval.with_raw_response.evaluate_rows(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> None:
+        with client.alpha.eval.with_streaming_response.evaluate_rows(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            client.alpha.eval.with_raw_response.evaluate_rows(
+                benchmark_id="",
+                benchmark_config={
+                    "eval_candidate": {
+                        "model": "model",
+                        "sampling_params": {"strategy": {"type": "greedy"}},
+                        "type": "model",
+                    },
+                    "scoring_params": {
+                        "foo": {
+                            "aggregation_functions": ["average"],
+                            "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
+                            "type": "llm_as_judge",
+                        }
+                    },
+                },
+                input_rows=[{"foo": True}],
+                scoring_functions=["string"],
+            )
+
+    @parametrize
+    def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
+        eval = client.alpha.eval.evaluate_rows_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        )
+        assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClient) -> None:
+        eval = client.alpha.eval.evaluate_rows_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {
+                        "strategy": {"type": "greedy"},
+                        "max_tokens": 0,
+                        "repetition_penalty": 0,
+                        "stop": ["string"],
+                    },
+                    "type": "model",
+                    "system_message": {
+                        "content": "string",
+                        "role": "system",
+                    },
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                        "prompt_template": "prompt_template",
+                    }
+                },
+                "num_examples": 0,
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        )
+        assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
+        response = client.alpha.eval.with_raw_response.evaluate_rows_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
+        with client.alpha.eval.with_streaming_response.evaluate_rows_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            client.alpha.eval.with_raw_response.evaluate_rows_alpha(
+                benchmark_id="",
+                benchmark_config={
+                    "eval_candidate": {
+                        "model": "model",
+                        "sampling_params": {"strategy": {"type": "greedy"}},
+                        "type": "model",
+                    },
+                    "scoring_params": {
+                        "foo": {
+                            "aggregation_functions": ["average"],
+                            "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
+                            "type": "llm_as_judge",
+                        }
+                    },
+                },
+                input_rows=[{"foo": True}],
+                scoring_functions=["string"],
+            )
+
+    @parametrize
+    def test_method_run_eval(self, client: LlamaStackClient) -> None:
+        eval = client.alpha.eval.run_eval(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+        )
+        assert_matches_type(Job, eval, path=["response"])
+
+    @parametrize
+    def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None:
+        eval = client.alpha.eval.run_eval(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {
+                        "strategy": {"type": "greedy"},
+                        "max_tokens": 0,
+                        "repetition_penalty": 0,
+                        "stop": ["string"],
+                    },
+                    "type": "model",
+                    "system_message": {
+                        "content": "string",
+                        "role": "system",
+                    },
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                        "prompt_template": "prompt_template",
+                    }
+                },
+                "num_examples": 0,
+            },
+        )
+        assert_matches_type(Job, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_run_eval(self, client: LlamaStackClient) -> None:
+        response = client.alpha.eval.with_raw_response.run_eval(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(Job, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None:
+        with client.alpha.eval.with_streaming_response.run_eval(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(Job, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_run_eval(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            client.alpha.eval.with_raw_response.run_eval(
+                benchmark_id="",
+                benchmark_config={
+                    "eval_candidate": {
+                        "model": "model",
+                        "sampling_params": {"strategy": {"type": "greedy"}},
+                        "type": "model",
+                    },
+                    "scoring_params": {
+                        "foo": {
+                            "aggregation_functions": ["average"],
+                            "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
+                            "type": "llm_as_judge",
+                        }
+                    },
+                },
+            )
+
+    @parametrize
+    def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None:
+        eval = client.alpha.eval.run_eval_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+        )
+        assert_matches_type(Job, eval, path=["response"])
+
+    @parametrize
+    def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) -> None:
+        eval = client.alpha.eval.run_eval_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {
+                        "strategy": {"type": "greedy"},
+                        "max_tokens": 0,
+                        "repetition_penalty": 0,
+                        "stop": ["string"],
+                    },
+                    "type": "model",
+                    "system_message": {
+                        "content": "string",
+                        "role": "system",
+                    },
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                        "prompt_template": "prompt_template",
+                    }
+                },
+                "num_examples": 0,
+            },
+        )
+        assert_matches_type(Job, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
+        response = client.alpha.eval.with_raw_response.run_eval_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(Job, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
+        with client.alpha.eval.with_streaming_response.run_eval_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(Job, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            client.alpha.eval.with_raw_response.run_eval_alpha(
+                benchmark_id="",
+                benchmark_config={
+                    "eval_candidate": {
+                        "model": "model",
+                        "sampling_params": {"strategy": {"type": "greedy"}},
+                        "type": "model",
+                    },
+                    "scoring_params": {
+                        "foo": {
+                            "aggregation_functions": ["average"],
+                            "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
+                            "type": "llm_as_judge",
+                        }
+                    },
+                },
+            )
+
+
+class TestAsyncEval:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
+        eval = await async_client.alpha.eval.evaluate_rows(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        )
+        assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        eval = await async_client.alpha.eval.evaluate_rows(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {
+                        "strategy": {"type": "greedy"},
+                        "max_tokens": 0,
+                        "repetition_penalty": 0,
+                        "stop": ["string"],
+                    },
+                    "type": "model",
+                    "system_message": {
+                        "content": "string",
+                        "role": "system",
+                    },
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                        "prompt_template": "prompt_template",
+                    }
+                },
+                "num_examples": 0,
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        )
+        assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.eval.with_raw_response.evaluate_rows(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = await response.parse()
+        assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.eval.with_streaming_response.evaluate_rows(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            await async_client.alpha.eval.with_raw_response.evaluate_rows(
+                benchmark_id="",
+                benchmark_config={
+                    "eval_candidate": {
+                        "model": "model",
+                        "sampling_params": {"strategy": {"type": "greedy"}},
+                        "type": "model",
+                    },
+                    "scoring_params": {
+                        "foo": {
+                            "aggregation_functions": ["average"],
+                            "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
+                            "type": "llm_as_judge",
+                        }
+                    },
+                },
+                input_rows=[{"foo": True}],
+                scoring_functions=["string"],
+            )
+
+    @parametrize
+    async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
+        eval = await async_client.alpha.eval.evaluate_rows_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        )
+        assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        eval = await async_client.alpha.eval.evaluate_rows_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {
+                        "strategy": {"type": "greedy"},
+                        "max_tokens": 0,
+                        "repetition_penalty": 0,
+                        "stop": ["string"],
+                    },
+                    "type": "model",
+                    "system_message": {
+                        "content": "string",
+                        "role": "system",
+                    },
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                        "prompt_template": "prompt_template",
+                    }
+                },
+                "num_examples": 0,
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        )
+        assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.eval.with_raw_response.evaluate_rows_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = await response.parse()
+        assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.eval.with_streaming_response.evaluate_rows_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+            input_rows=[{"foo": True}],
+            scoring_functions=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvaluateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            await async_client.alpha.eval.with_raw_response.evaluate_rows_alpha(
+                benchmark_id="",
+                benchmark_config={
+                    "eval_candidate": {
+                        "model": "model",
+                        "sampling_params": {"strategy": {"type": "greedy"}},
+                        "type": "model",
+                    },
+                    "scoring_params": {
+                        "foo": {
+                            "aggregation_functions": ["average"],
+                            "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
+                            "type": "llm_as_judge",
+                        }
+                    },
+                },
+                input_rows=[{"foo": True}],
+                scoring_functions=["string"],
+            )
+
+    @parametrize
+    async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
+        eval = await async_client.alpha.eval.run_eval(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+        )
+        assert_matches_type(Job, eval, path=["response"])
+
+    @parametrize
+    async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        eval = await async_client.alpha.eval.run_eval(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {
+                        "strategy": {"type": "greedy"},
+                        "max_tokens": 0,
+                        "repetition_penalty": 0,
+                        "stop": ["string"],
+                    },
+                    "type": "model",
+                    "system_message": {
+                        "content": "string",
+                        "role": "system",
+                    },
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                        "prompt_template": "prompt_template",
+                    }
+                },
+                "num_examples": 0,
+            },
+        )
+        assert_matches_type(Job, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.eval.with_raw_response.run_eval(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = await response.parse()
+        assert_matches_type(Job, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.eval.with_streaming_response.run_eval(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(Job, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            await async_client.alpha.eval.with_raw_response.run_eval(
+                benchmark_id="",
+                benchmark_config={
+                    "eval_candidate": {
+                        "model": "model",
+                        "sampling_params": {"strategy": {"type": "greedy"}},
+                        "type": "model",
+                    },
+                    "scoring_params": {
+                        "foo": {
+                            "aggregation_functions": ["average"],
+                            "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
+                            "type": "llm_as_judge",
+                        }
+                    },
+                },
+            )
+
+    @parametrize
+    async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
+        eval = await async_client.alpha.eval.run_eval_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+        )
+        assert_matches_type(Job, eval, path=["response"])
+
+    @parametrize
+    async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        eval = await async_client.alpha.eval.run_eval_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {
+                        "strategy": {"type": "greedy"},
+                        "max_tokens": 0,
+                        "repetition_penalty": 0,
+                        "stop": ["string"],
+                    },
+                    "type": "model",
+                    "system_message": {
+                        "content": "string",
+                        "role": "system",
+                    },
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                        "prompt_template": "prompt_template",
+                    }
+                },
+                "num_examples": 0,
+            },
+        )
+        assert_matches_type(Job, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.eval.with_raw_response.run_eval_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = await response.parse()
+        assert_matches_type(Job, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.eval.with_streaming_response.run_eval_alpha(
+            benchmark_id="benchmark_id",
+            benchmark_config={
+                "eval_candidate": {
+                    "model": "model",
+                    "sampling_params": {"strategy": {"type": "greedy"}},
+                    "type": "model",
+                },
+                "scoring_params": {
+                    "foo": {
+                        "aggregation_functions": ["average"],
+                        "judge_model": "judge_model",
+                        "judge_score_regexes": ["string"],
+                        "type": "llm_as_judge",
+                    }
+                },
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(Job, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
+            await async_client.alpha.eval.with_raw_response.run_eval_alpha(
+                benchmark_id="",
+                benchmark_config={
+                    "eval_candidate": {
+                        "model": "model",
+                        "sampling_params": {"strategy": {"type": "greedy"}},
+                        "type": "model",
+                    },
+                    "scoring_params": {
+                        "foo": {
+                            "aggregation_functions": ["average"],
+                            "judge_model": "judge_model",
+                            "judge_score_regexes": ["string"],
+                            "type": "llm_as_judge",
+                        }
+                    },
+                },
+            )
diff --git a/tests/api_resources/alpha/test_inference.py b/tests/api_resources/alpha/test_inference.py
new file mode 100644
index 00000000..551e2213
--- /dev/null
+++ b/tests/api_resources/alpha/test_inference.py
@@ -0,0 +1,118 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types.alpha import InferenceRerankResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestInference:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_rerank(self, client: LlamaStackClient) -> None:
+        inference = client.alpha.inference.rerank(
+            items=["string"],
+            model="model",
+            query="string",
+        )
+        assert_matches_type(InferenceRerankResponse, inference, path=["response"])
+
+    @parametrize
+    def test_method_rerank_with_all_params(self, client: LlamaStackClient) -> None:
+        inference = client.alpha.inference.rerank(
+            items=["string"],
+            model="model",
+            query="string",
+            max_num_results=0,
+        )
+        assert_matches_type(InferenceRerankResponse, inference, path=["response"])
+
+    @parametrize
+    def test_raw_response_rerank(self, client: LlamaStackClient) -> None:
+        response = client.alpha.inference.with_raw_response.rerank(
+            items=["string"],
+            model="model",
+            query="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        inference = response.parse()
+        assert_matches_type(InferenceRerankResponse, inference, path=["response"])
+
+    @parametrize
+    def test_streaming_response_rerank(self, client: LlamaStackClient) -> None:
+        with client.alpha.inference.with_streaming_response.rerank(
+            items=["string"],
+            model="model",
+            query="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            inference = response.parse()
+            assert_matches_type(InferenceRerankResponse, inference, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncInference:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_rerank(self, async_client: AsyncLlamaStackClient) -> None:
+        inference = await async_client.alpha.inference.rerank(
+            items=["string"],
+            model="model",
+            query="string",
+        )
+        assert_matches_type(InferenceRerankResponse, inference, path=["response"])
+
+    @parametrize
+    async def test_method_rerank_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        inference = await async_client.alpha.inference.rerank(
+            items=["string"],
+            model="model",
+            query="string",
+            max_num_results=0,
+        )
+        assert_matches_type(InferenceRerankResponse, inference, path=["response"])
+
+    @parametrize
+    async def test_raw_response_rerank(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.inference.with_raw_response.rerank(
+            items=["string"],
+            model="model",
+            query="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        inference = await response.parse()
+        assert_matches_type(InferenceRerankResponse, inference, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_rerank(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.inference.with_streaming_response.rerank(
+            items=["string"],
+            model="model",
+            query="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            inference = await response.parse()
+            assert_matches_type(InferenceRerankResponse, inference, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/alpha/test_post_training.py b/tests/api_resources/alpha/test_post_training.py
new file mode 100644
index 00000000..14229811
--- /dev/null
+++ b/tests/api_resources/alpha/test_post_training.py
@@ -0,0 +1,446 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types.alpha import (
+    PostTrainingJob,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestPostTraining:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_preference_optimize(self, client: LlamaStackClient) -> None:
+        post_training = client.alpha.post_training.preference_optimize(
+            algorithm_config={
+                "beta": 0,
+                "loss_type": "sigmoid",
+            },
+            finetuned_model="finetuned_model",
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+            },
+        )
+        assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+    @parametrize
+    def test_method_preference_optimize_with_all_params(self, client: LlamaStackClient) -> None:
+        post_training = client.alpha.post_training.preference_optimize(
+            algorithm_config={
+                "beta": 0,
+                "loss_type": "sigmoid",
+            },
+            finetuned_model="finetuned_model",
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+                "data_config": {
+                    "batch_size": 0,
+                    "data_format": "instruct",
+                    "dataset_id": "dataset_id",
+                    "shuffle": True,
+                    "packed": True,
+                    "train_on_input": True,
+                    "validation_dataset_id": "validation_dataset_id",
+                },
+                "dtype": "dtype",
+                "efficiency_config": {
+                    "enable_activation_checkpointing": True,
+                    "enable_activation_offloading": True,
+                    "fsdp_cpu_offload": True,
+                    "memory_efficient_fsdp_wrap": True,
+                },
+                "max_validation_steps": 0,
+                "optimizer_config": {
+                    "lr": 0,
+                    "num_warmup_steps": 0,
+                    "optimizer_type": "adam",
+                    "weight_decay": 0,
+                },
+            },
+        )
+        assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+    @parametrize
+    def test_raw_response_preference_optimize(self, client: LlamaStackClient) -> None:
+        response = client.alpha.post_training.with_raw_response.preference_optimize(
+            algorithm_config={
+                "beta": 0,
+                "loss_type": "sigmoid",
+            },
+            finetuned_model="finetuned_model",
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        post_training = response.parse()
+        assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+    @parametrize
+    def test_streaming_response_preference_optimize(self, client: LlamaStackClient) -> None:
+        with client.alpha.post_training.with_streaming_response.preference_optimize(
+            algorithm_config={
+                "beta": 0,
+                "loss_type": "sigmoid",
+            },
+            finetuned_model="finetuned_model",
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            post_training = response.parse()
+            assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_supervised_fine_tune(self, client: LlamaStackClient) -> None:
+        post_training = client.alpha.post_training.supervised_fine_tune(
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+            },
+        )
+        assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+    @parametrize
+    def test_method_supervised_fine_tune_with_all_params(self, client: LlamaStackClient) -> None:
+        post_training = client.alpha.post_training.supervised_fine_tune(
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+                "data_config": {
+                    "batch_size": 0,
+                    "data_format": "instruct",
+                    "dataset_id": "dataset_id",
+                    "shuffle": True,
+                    "packed": True,
+                    "train_on_input": True,
+                    "validation_dataset_id": "validation_dataset_id",
+                },
+                "dtype": "dtype",
+                "efficiency_config": {
+                    "enable_activation_checkpointing": True,
+                    "enable_activation_offloading": True,
+                    "fsdp_cpu_offload": True,
+                    "memory_efficient_fsdp_wrap": True,
+                },
+                "max_validation_steps": 0,
+                "optimizer_config": {
+                    "lr": 0,
+                    "num_warmup_steps": 0,
+                    "optimizer_type": "adam",
+                    "weight_decay": 0,
+                },
+            },
+            algorithm_config={
+                "alpha": 0,
+                "apply_lora_to_mlp": True,
+                "apply_lora_to_output": True,
+                "lora_attn_modules": ["string"],
+                "rank": 0,
+                "type": "LoRA",
+                "quantize_base": True,
+                "use_dora": True,
+            },
+            checkpoint_dir="checkpoint_dir",
+            model="model",
+        )
+        assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+    @parametrize
+    def test_raw_response_supervised_fine_tune(self, client: LlamaStackClient) -> None:
+        response = client.alpha.post_training.with_raw_response.supervised_fine_tune(
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        post_training = response.parse()
+        assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+    @parametrize
+    def test_streaming_response_supervised_fine_tune(self, client: LlamaStackClient) -> None:
+        with client.alpha.post_training.with_streaming_response.supervised_fine_tune(
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            post_training = response.parse()
+            assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncPostTraining:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
+        post_training = await async_client.alpha.post_training.preference_optimize(
+            algorithm_config={
+                "beta": 0,
+                "loss_type": "sigmoid",
+            },
+            finetuned_model="finetuned_model",
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+            },
+        )
+        assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+    @parametrize
+    async def test_method_preference_optimize_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        post_training = await async_client.alpha.post_training.preference_optimize(
+            algorithm_config={
+                "beta": 0,
+                "loss_type": "sigmoid",
+            },
+            finetuned_model="finetuned_model",
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+                "data_config": {
+                    "batch_size": 0,
+                    "data_format": "instruct",
+                    "dataset_id": "dataset_id",
+                    "shuffle": True,
+                    "packed": True,
+                    "train_on_input": True,
+                    "validation_dataset_id": "validation_dataset_id",
+                },
+                "dtype": "dtype",
+                "efficiency_config": {
+                    "enable_activation_checkpointing": True,
+                    "enable_activation_offloading": True,
+                    "fsdp_cpu_offload": True,
+                    "memory_efficient_fsdp_wrap": True,
+                },
+                "max_validation_steps": 0,
+                "optimizer_config": {
+                    "lr": 0,
+                    "num_warmup_steps": 0,
+                    "optimizer_type": "adam",
+                    "weight_decay": 0,
+                },
+            },
+        )
+        assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+    @parametrize
+    async def test_raw_response_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.post_training.with_raw_response.preference_optimize(
+            algorithm_config={
+                "beta": 0,
+                "loss_type": "sigmoid",
+            },
+            finetuned_model="finetuned_model",
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        post_training = await response.parse()
+        assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.post_training.with_streaming_response.preference_optimize(
+            algorithm_config={
+                "beta": 0,
+                "loss_type": "sigmoid",
+            },
+            finetuned_model="finetuned_model",
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            post_training = await response.parse()
+            assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
+        post_training = await async_client.alpha.post_training.supervised_fine_tune(
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+            },
+        )
+        assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+    @parametrize
+    async def test_method_supervised_fine_tune_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        post_training = await async_client.alpha.post_training.supervised_fine_tune(
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+                "data_config": {
+                    "batch_size": 0,
+                    "data_format": "instruct",
+                    "dataset_id": "dataset_id",
+                    "shuffle": True,
+                    "packed": True,
+                    "train_on_input": True,
+                    "validation_dataset_id": "validation_dataset_id",
+                },
+                "dtype": "dtype",
+                "efficiency_config": {
+                    "enable_activation_checkpointing": True,
+                    "enable_activation_offloading": True,
+                    "fsdp_cpu_offload": True,
+                    "memory_efficient_fsdp_wrap": True,
+                },
+                "max_validation_steps": 0,
+                "optimizer_config": {
+                    "lr": 0,
+                    "num_warmup_steps": 0,
+                    "optimizer_type": "adam",
+                    "weight_decay": 0,
+                },
+            },
+            algorithm_config={
+                "alpha": 0,
+                "apply_lora_to_mlp": True,
+                "apply_lora_to_output": True,
+                "lora_attn_modules": ["string"],
+                "rank": 0,
+                "type": "LoRA",
+                "quantize_base": True,
+                "use_dora": True,
+            },
+            checkpoint_dir="checkpoint_dir",
+            model="model",
+        )
+        assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+    @parametrize
+    async def test_raw_response_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.alpha.post_training.with_raw_response.supervised_fine_tune(
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        post_training = await response.parse()
+        assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.alpha.post_training.with_streaming_response.supervised_fine_tune(
+            hyperparam_search_config={"foo": True},
+            job_uuid="job_uuid",
+            logger_config={"foo": True},
+            training_config={
+                "gradient_accumulation_steps": 0,
+                "max_steps_per_epoch": 0,
+                "n_epochs": 0,
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            post_training = await response.parse()
+            assert_matches_type(PostTrainingJob, post_training, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/__init__.py b/tests/api_resources/beta/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/beta/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/test_datasets.py b/tests/api_resources/beta/test_datasets.py
new file mode 100644
index 00000000..3d035a16
--- /dev/null
+++ b/tests/api_resources/beta/test_datasets.py
@@ -0,0 +1,521 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types.beta import (
+    DatasetListResponse,
+    DatasetIterrowsResponse,
+    DatasetRegisterResponse,
+    DatasetRetrieveResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestDatasets:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackClient) -> None:
+        dataset = client.beta.datasets.retrieve(
+            "dataset_id",
+        )
+        assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
+        response = client.beta.datasets.with_raw_response.retrieve(
+            "dataset_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        dataset = response.parse()
+        assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
+        with client.beta.datasets.with_streaming_response.retrieve(
+            "dataset_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            dataset = response.parse()
+            assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
+            client.beta.datasets.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_list(self, client: LlamaStackClient) -> None:
+        dataset = client.beta.datasets.list()
+        assert_matches_type(DatasetListResponse, dataset, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: LlamaStackClient) -> None:
+        response = client.beta.datasets.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        dataset = response.parse()
+        assert_matches_type(DatasetListResponse, dataset, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
+        with client.beta.datasets.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            dataset = response.parse()
+            assert_matches_type(DatasetListResponse, dataset, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_appendrows(self, client: LlamaStackClient) -> None:
+        dataset = client.beta.datasets.appendrows(
+            dataset_id="dataset_id",
+            rows=[{"foo": True}],
+        )
+        assert dataset is None
+
+    @parametrize
+    def test_raw_response_appendrows(self, client: LlamaStackClient) -> None:
+        response = client.beta.datasets.with_raw_response.appendrows(
+            dataset_id="dataset_id",
+            rows=[{"foo": True}],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        dataset = response.parse()
+        assert dataset is None
+
+    @parametrize
+    def test_streaming_response_appendrows(self, client: LlamaStackClient) -> None:
+        with client.beta.datasets.with_streaming_response.appendrows(
+            dataset_id="dataset_id",
+            rows=[{"foo": True}],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            dataset = response.parse()
+            assert dataset is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_appendrows(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
+            client.beta.datasets.with_raw_response.appendrows(
+                dataset_id="",
+                rows=[{"foo": True}],
+            )
+
+    @parametrize
+    def test_method_iterrows(self, client: LlamaStackClient) -> None:
+        dataset = client.beta.datasets.iterrows(
+            dataset_id="dataset_id",
+        )
+        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
+
+    @parametrize
+    def test_method_iterrows_with_all_params(self, client: LlamaStackClient) -> None:
+        dataset = client.beta.datasets.iterrows(
+            dataset_id="dataset_id",
+            limit=0,
+            start_index=0,
+        )
+        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
+
+    @parametrize
+    def test_raw_response_iterrows(self, client: LlamaStackClient) -> None:
+        response = client.beta.datasets.with_raw_response.iterrows(
+            dataset_id="dataset_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        dataset = response.parse()
+        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
+
+    @parametrize
+    def test_streaming_response_iterrows(self, client: LlamaStackClient) -> None:
+        with client.beta.datasets.with_streaming_response.iterrows(
+            dataset_id="dataset_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            dataset = response.parse()
+            assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_iterrows(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
+            client.beta.datasets.with_raw_response.iterrows(
+                dataset_id="",
+            )
+
+    @parametrize
+    def test_method_register(self, client: LlamaStackClient) -> None:
+        dataset = client.beta.datasets.register(
+            purpose="post-training/messages",
+            source={
+                "type": "uri",
+                "uri": "uri",
+            },
+        )
+        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
+
+    @parametrize
+    def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
+        dataset = client.beta.datasets.register(
+            purpose="post-training/messages",
+            source={
+                "type": "uri",
+                "uri": "uri",
+            },
+            dataset_id="dataset_id",
+            metadata={"foo": True},
+        )
+        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
+
+    @parametrize
+    def test_raw_response_register(self, client: LlamaStackClient) -> None:
+        response = client.beta.datasets.with_raw_response.register(
+            purpose="post-training/messages",
+            source={
+                "type": "uri",
+                "uri": "uri",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        dataset = response.parse()
+        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
+
+    @parametrize
+    def test_streaming_response_register(self, client: LlamaStackClient) -> None:
+        with client.beta.datasets.with_streaming_response.register(
+            purpose="post-training/messages",
+            source={
+                "type": "uri",
+                "uri": "uri",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            dataset = response.parse()
+            assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_unregister(self, client: LlamaStackClient) -> None:
+        dataset = client.beta.datasets.unregister(
+            "dataset_id",
+        )
+        assert dataset is None
+
+    @parametrize
+    def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
+        response = client.beta.datasets.with_raw_response.unregister(
+            "dataset_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        dataset = response.parse()
+        assert dataset is None
+
+    @parametrize
+    def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
+        with client.beta.datasets.with_streaming_response.unregister(
+            "dataset_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            dataset = response.parse()
+            assert dataset is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_unregister(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
+            client.beta.datasets.with_raw_response.unregister(
+                "",
+            )
+
+
+class TestAsyncDatasets:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        dataset = await async_client.beta.datasets.retrieve(
+            "dataset_id",
+        )
+        assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.beta.datasets.with_raw_response.retrieve(
+            "dataset_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        dataset = await response.parse()
+        assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.beta.datasets.with_streaming_response.retrieve(
+            "dataset_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            dataset = await response.parse()
+            assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
+            await async_client.beta.datasets.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
+        dataset = await async_client.beta.datasets.list()
+        assert_matches_type(DatasetListResponse, dataset, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.beta.datasets.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        dataset = await response.parse()
+        assert_matches_type(DatasetListResponse, dataset, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.beta.datasets.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            dataset = await response.parse()
+            assert_matches_type(DatasetListResponse, dataset, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_appendrows(self, async_client: AsyncLlamaStackClient) -> None:
+        dataset = await async_client.beta.datasets.appendrows(
+            dataset_id="dataset_id",
+            rows=[{"foo": True}],
+        )
+        assert dataset is None
+
+    @parametrize
+    async def test_raw_response_appendrows(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.beta.datasets.with_raw_response.appendrows(
+            dataset_id="dataset_id",
+            rows=[{"foo": True}],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        dataset = await response.parse()
+        assert dataset is None
+
+    @parametrize
+    async def test_streaming_response_appendrows(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.beta.datasets.with_streaming_response.appendrows(
+            dataset_id="dataset_id",
+            rows=[{"foo": True}],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            dataset = await response.parse()
+            assert dataset is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_appendrows(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
+            await async_client.beta.datasets.with_raw_response.appendrows(
+                dataset_id="",
+                rows=[{"foo": True}],
+            )
+
+    @parametrize
+    async def test_method_iterrows(self, async_client: AsyncLlamaStackClient) -> None:
+        dataset = await async_client.beta.datasets.iterrows(
+            dataset_id="dataset_id",
+        )
+        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
+
+    @parametrize
+    async def test_method_iterrows_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        dataset = await async_client.beta.datasets.iterrows(
+            dataset_id="dataset_id",
+            limit=0,
+            start_index=0,
+        )
+        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
+
+    @parametrize
+    async def test_raw_response_iterrows(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.beta.datasets.with_raw_response.iterrows(
+            dataset_id="dataset_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        dataset = await response.parse()
+        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_iterrows(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.beta.datasets.with_streaming_response.iterrows(
+            dataset_id="dataset_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            dataset = await response.parse()
+            assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_iterrows(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
+            await async_client.beta.datasets.with_raw_response.iterrows(
+                dataset_id="",
+            )
+
+    @parametrize
+    async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None:
+        dataset = await async_client.beta.datasets.register(
+            purpose="post-training/messages",
+            source={
+                "type": "uri",
+                "uri": "uri",
+            },
+        )
+        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
+
+    @parametrize
+    async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        dataset = await async_client.beta.datasets.register(
+            purpose="post-training/messages",
+            source={
+                "type": "uri",
+                "uri": "uri",
+            },
+            dataset_id="dataset_id",
+            metadata={"foo": True},
+        )
+        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
+
+    @parametrize
+    async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.beta.datasets.with_raw_response.register(
+            purpose="post-training/messages",
+            source={
+                "type": "uri",
+                "uri": "uri",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        dataset = await response.parse()
+        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.beta.datasets.with_streaming_response.register(
+            purpose="post-training/messages",
+            source={
+                "type": "uri",
+                "uri": "uri",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            dataset = await response.parse()
+            assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_unregister(self, async_client: AsyncLlamaStackClient) -> None:
+        dataset = await async_client.beta.datasets.unregister(
+            "dataset_id",
+        )
+        assert dataset is None
+
+    @parametrize
+    async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.beta.datasets.with_raw_response.unregister(
+            "dataset_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        dataset = await response.parse()
+        assert dataset is None
+
+    @parametrize
+    async def test_streaming_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.beta.datasets.with_streaming_response.unregister(
+            "dataset_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            dataset = await response.parse()
+            assert dataset is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_unregister(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
+            await async_client.beta.datasets.with_raw_response.unregister(
+                "",
+            )

From d8ab6cb77267af53f3f2e9ff3ebaab9364a754c7 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Wed, 29 Oct 2025 17:47:27 +0000
Subject: [PATCH 3/9] feat(api): manual updates??!

---
 .stats.yml                                    |   6 +-
 api.md                                        |   2 +-
 .../resources/responses/responses.py          |  22 ----
 src/llama_stack_client/resources/routes.py    |  37 +-----
 src/llama_stack_client/types/__init__.py      |   1 -
 .../types/conversation_create_params.py       |  22 ----
 .../types/conversations/item_create_params.py |  22 ----
 .../conversations/item_create_response.py     |  22 ----
 .../types/conversations/item_get_response.py  |  22 ----
 .../types/conversations/item_list_response.py |  22 ----
 .../types/response_create_params.py           |  93 --------------
 .../types/response_list_response.py           | 116 ------------------
 .../types/response_object.py                  |  94 --------------
 .../types/response_object_stream.py           |  48 --------
 .../responses/input_item_list_response.py     |  22 ----
 .../types/route_list_params.py                |  17 ---
 tests/api_resources/test_responses.py         |  40 ------
 tests/api_resources/test_routes.py            |  14 ---
 18 files changed, 5 insertions(+), 617 deletions(-)
 delete mode 100644 src/llama_stack_client/types/route_list_params.py

diff --git a/.stats.yml b/.stats.yml
index 12443710..24428280 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 104
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-ab75f403b95703f8fe6c284da9efc1cc09d91cb27a4aa4da8660c825b56ddd02.yml
-openapi_spec_hash: 10f4950f76234968692b748956c83d52
-config_hash: a3829dbdaa491194d01f399784d532cd
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-35c6569e5e9fcc85084c9728eb7fc7c5908297fcc77043d621d25de3c850a990.yml
+openapi_spec_hash: 0f95bbeee16f3205d36ec34cfa62c711
+config_hash: fa14a2107881931b2ddef8c768eeb558
diff --git a/api.md b/api.md
index 9dbfca84..5f7a90a2 100644
--- a/api.md
+++ b/api.md
@@ -316,7 +316,7 @@ from llama_stack_client.types import ListRoutesResponse, RouteListResponse
 
 Methods:
 
-- <code title="get /v1/inspect/routes">client.routes.<a href="./src/llama_stack_client/resources/routes.py">list</a>(\*\*<a href="src/llama_stack_client/types/route_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/route_list_response.py">RouteListResponse</a></code>
+- <code title="get /v1/inspect/routes">client.routes.<a href="./src/llama_stack_client/resources/routes.py">list</a>() -> <a href="./src/llama_stack_client/types/route_list_response.py">RouteListResponse</a></code>
 
 # Moderations
 
diff --git a/src/llama_stack_client/resources/responses/responses.py b/src/llama_stack_client/resources/responses/responses.py
index e0109583..6bc29a62 100644
--- a/src/llama_stack_client/resources/responses/responses.py
+++ b/src/llama_stack_client/resources/responses/responses.py
@@ -78,7 +78,6 @@ def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
-        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         stream: Literal[False] | Omit = omit,
         temperature: float | Omit = omit,
@@ -109,8 +108,6 @@ def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
-          prompt: (Optional) Prompt object with ID, version, and variables.
-
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -135,7 +132,6 @@ def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
-        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         temperature: float | Omit = omit,
         text: response_create_params.Text | Omit = omit,
@@ -165,8 +161,6 @@ def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
-          prompt: (Optional) Prompt object with ID, version, and variables.
-
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -191,7 +185,6 @@ def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
-        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         temperature: float | Omit = omit,
         text: response_create_params.Text | Omit = omit,
@@ -221,8 +214,6 @@ def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
-          prompt: (Optional) Prompt object with ID, version, and variables.
-
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -246,7 +237,6 @@ def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
-        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         stream: Literal[False] | Literal[True] | Omit = omit,
         temperature: float | Omit = omit,
@@ -270,7 +260,6 @@ def create(
                     "instructions": instructions,
                     "max_infer_iters": max_infer_iters,
                     "previous_response_id": previous_response_id,
-                    "prompt": prompt,
                     "store": store,
                     "stream": stream,
                     "temperature": temperature,
@@ -446,7 +435,6 @@ async def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
-        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         stream: Literal[False] | Omit = omit,
         temperature: float | Omit = omit,
@@ -477,8 +465,6 @@ async def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
-          prompt: (Optional) Prompt object with ID, version, and variables.
-
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -503,7 +489,6 @@ async def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
-        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         temperature: float | Omit = omit,
         text: response_create_params.Text | Omit = omit,
@@ -533,8 +518,6 @@ async def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
-          prompt: (Optional) Prompt object with ID, version, and variables.
-
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -559,7 +542,6 @@ async def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
-        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         temperature: float | Omit = omit,
         text: response_create_params.Text | Omit = omit,
@@ -589,8 +571,6 @@ async def create(
               response. This can be used to easily fork-off new responses from existing
               responses.
 
-          prompt: (Optional) Prompt object with ID, version, and variables.
-
           text: Text response configuration for OpenAI responses.
 
           extra_headers: Send extra headers
@@ -614,7 +594,6 @@ async def create(
         instructions: str | Omit = omit,
         max_infer_iters: int | Omit = omit,
         previous_response_id: str | Omit = omit,
-        prompt: response_create_params.Prompt | Omit = omit,
         store: bool | Omit = omit,
         stream: Literal[False] | Literal[True] | Omit = omit,
         temperature: float | Omit = omit,
@@ -638,7 +617,6 @@ async def create(
                     "instructions": instructions,
                     "max_infer_iters": max_infer_iters,
                     "previous_response_id": previous_response_id,
-                    "prompt": prompt,
                     "store": store,
                     "stream": stream,
                     "temperature": temperature,
diff --git a/src/llama_stack_client/resources/routes.py b/src/llama_stack_client/resources/routes.py
index ff9b2c59..0797d00f 100644
--- a/src/llama_stack_client/resources/routes.py
+++ b/src/llama_stack_client/resources/routes.py
@@ -9,13 +9,10 @@
 from __future__ import annotations
 
 from typing import Type, cast
-from typing_extensions import Literal
 
 import httpx
 
-from ..types import route_list_params
-from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
-from .._utils import maybe_transform, async_maybe_transform
+from .._types import Body, Query, Headers, NotGiven, not_given
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -54,7 +51,6 @@ def with_streaming_response(self) -> RoutesResourceWithStreamingResponse:
     def list(
         self,
         *,
-        api_filter: Literal["v1", "v1alpha", "v1beta", "deprecated"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -66,20 +62,6 @@ def list(
 
         List all available API routes with their methods and implementing
         providers.
-
-        Args:
-          api_filter: Optional filter to control which routes are returned. Can be an API level ('v1',
-              'v1alpha', 'v1beta') to show non-deprecated routes at that level, or
-              'deprecated' to show deprecated routes across all levels. If not specified,
-              returns only non-deprecated v1 routes.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._get(
             "/v1/inspect/routes",
@@ -88,7 +70,6 @@ def list(
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=maybe_transform({"api_filter": api_filter}, route_list_params.RouteListParams),
                 post_parser=DataWrapper[RouteListResponse]._unwrapper,
             ),
             cast_to=cast(Type[RouteListResponse], DataWrapper[RouteListResponse]),
@@ -118,7 +99,6 @@ def with_streaming_response(self) -> AsyncRoutesResourceWithStreamingResponse:
     async def list(
         self,
         *,
-        api_filter: Literal["v1", "v1alpha", "v1beta", "deprecated"] | Omit = omit,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -130,20 +110,6 @@ async def list(
 
         List all available API routes with their methods and implementing
         providers.
-
-        Args:
-          api_filter: Optional filter to control which routes are returned. Can be an API level ('v1',
-              'v1alpha', 'v1beta') to show non-deprecated routes at that level, or
-              'deprecated' to show deprecated routes across all levels. If not specified,
-              returns only non-deprecated v1 routes.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._get(
             "/v1/inspect/routes",
@@ -152,7 +118,6 @@ async def list(
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=await async_maybe_transform({"api_filter": api_filter}, route_list_params.RouteListParams),
                 post_parser=DataWrapper[RouteListResponse]._unwrapper,
             ),
             cast_to=cast(Type[RouteListResponse], DataWrapper[RouteListResponse]),
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index cd62db7f..173a1e03 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -43,7 +43,6 @@
 from .response_object import ResponseObject as ResponseObject
 from .file_list_params import FileListParams as FileListParams
 from .tool_list_params import ToolListParams as ToolListParams
-from .route_list_params import RouteListParams as RouteListParams
 from .scoring_fn_params import ScoringFnParams as ScoringFnParams
 from .file_create_params import FileCreateParams as FileCreateParams
 from .tool_list_response import ToolListResponse as ToolListResponse
diff --git a/src/llama_stack_client/types/conversation_create_params.py b/src/llama_stack_client/types/conversation_create_params.py
index 96fbb82e..c51245dd 100644
--- a/src/llama_stack_client/types/conversation_create_params.py
+++ b/src/llama_stack_client/types/conversation_create_params.py
@@ -20,7 +20,6 @@
     "ItemOpenAIResponseMessageContentUnionMember1",
     "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "ItemOpenAIResponseMessageContentUnionMember2",
     "ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -65,34 +64,13 @@ class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
     type: Required[Literal["input_image"]]
     """Content type identifier, always "input_image" """
 
-    file_id: str
-    """(Optional) The ID of the file to be sent to the model."""
-
     image_url: str
     """(Optional) URL of the image content"""
 
 
-class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(TypedDict, total=False):
-    type: Required[Literal["input_file"]]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: str
-    """The data of the file to be sent to the model."""
-
-    file_id: str
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: str
-    """The URL of the file to be sent to the model."""
-
-    filename: str
-    """The name of the file to be sent to the model."""
-
-
 ItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
     ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
     ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-    ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
 ]
 
 
diff --git a/src/llama_stack_client/types/conversations/item_create_params.py b/src/llama_stack_client/types/conversations/item_create_params.py
index 111c39fb..8df31144 100644
--- a/src/llama_stack_client/types/conversations/item_create_params.py
+++ b/src/llama_stack_client/types/conversations/item_create_params.py
@@ -20,7 +20,6 @@
     "ItemOpenAIResponseMessageContentUnionMember1",
     "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "ItemOpenAIResponseMessageContentUnionMember2",
     "ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "ItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -62,34 +61,13 @@ class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
     type: Required[Literal["input_image"]]
     """Content type identifier, always "input_image" """
 
-    file_id: str
-    """(Optional) The ID of the file to be sent to the model."""
-
     image_url: str
     """(Optional) URL of the image content"""
 
 
-class ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(TypedDict, total=False):
-    type: Required[Literal["input_file"]]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: str
-    """The data of the file to be sent to the model."""
-
-    file_id: str
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: str
-    """The URL of the file to be sent to the model."""
-
-    filename: str
-    """The name of the file to be sent to the model."""
-
-
 ItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
     ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
     ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-    ItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
 ]
 
 
diff --git a/src/llama_stack_client/types/conversations/item_create_response.py b/src/llama_stack_client/types/conversations/item_create_response.py
index 580aaf23..c382e2b9 100644
--- a/src/llama_stack_client/types/conversations/item_create_response.py
+++ b/src/llama_stack_client/types/conversations/item_create_response.py
@@ -19,7 +19,6 @@
     "DataOpenAIResponseMessageContentUnionMember1",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "DataOpenAIResponseMessageContentUnionMember2",
     "DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -56,35 +55,14 @@ class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
-class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
-    type: Literal["input_file"]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: Optional[str] = None
-    """The data of the file to be sent to the model."""
-
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: Optional[str] = None
-    """The URL of the file to be sent to the model."""
-
-    filename: Optional[str] = None
-    """The name of the file to be sent to the model."""
-
-
 DataOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-        DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/conversations/item_get_response.py b/src/llama_stack_client/types/conversations/item_get_response.py
index 434e4639..9f8d4bda 100644
--- a/src/llama_stack_client/types/conversations/item_get_response.py
+++ b/src/llama_stack_client/types/conversations/item_get_response.py
@@ -18,7 +18,6 @@
     "OpenAIResponseMessageContentUnionMember1",
     "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -55,35 +54,14 @@ class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentI
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
-class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
-    type: Literal["input_file"]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: Optional[str] = None
-    """The data of the file to be sent to the model."""
-
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: Optional[str] = None
-    """The URL of the file to be sent to the model."""
-
-    filename: Optional[str] = None
-    """The name of the file to be sent to the model."""
-
-
 OpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-        OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/conversations/item_list_response.py b/src/llama_stack_client/types/conversations/item_list_response.py
index d6ba4735..b95f56fb 100644
--- a/src/llama_stack_client/types/conversations/item_list_response.py
+++ b/src/llama_stack_client/types/conversations/item_list_response.py
@@ -18,7 +18,6 @@
     "OpenAIResponseMessageContentUnionMember1",
     "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -55,35 +54,14 @@ class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentI
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
-class OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
-    type: Literal["input_file"]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: Optional[str] = None
-    """The data of the file to be sent to the model."""
-
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: Optional[str] = None
-    """The URL of the file to be sent to the model."""
-
-    filename: Optional[str] = None
-    """The name of the file to be sent to the model."""
-
-
 OpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-        OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/response_create_params.py b/src/llama_stack_client/types/response_create_params.py
index f99cd037..c8b48657 100644
--- a/src/llama_stack_client/types/response_create_params.py
+++ b/src/llama_stack_client/types/response_create_params.py
@@ -20,7 +20,6 @@
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember2",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -39,11 +38,6 @@
     "InputUnionMember1OpenAIResponseMcpApprovalRequest",
     "InputUnionMember1OpenAIResponseInputFunctionToolCallOutput",
     "InputUnionMember1OpenAIResponseMcpApprovalResponse",
-    "Prompt",
-    "PromptVariables",
-    "PromptVariablesOpenAIResponseInputMessageContentText",
-    "PromptVariablesOpenAIResponseInputMessageContentImage",
-    "PromptVariablesOpenAIResponseInputMessageContentFile",
     "Text",
     "TextFormat",
     "Tool",
@@ -89,9 +83,6 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     responses.
     """
 
-    prompt: Prompt
-    """(Optional) Prompt object with ID, version, and variables."""
-
     store: bool
 
     temperature: float
@@ -121,36 +112,13 @@ class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInp
     type: Required[Literal["input_image"]]
     """Content type identifier, always "input_image" """
 
-    file_id: str
-    """(Optional) The ID of the file to be sent to the model."""
-
     image_url: str
     """(Optional) URL of the image content"""
 
 
-class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(
-    TypedDict, total=False
-):
-    type: Required[Literal["input_file"]]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: str
-    """The data of the file to be sent to the model."""
-
-    file_id: str
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: str
-    """The URL of the file to be sent to the model."""
-
-    filename: str
-    """The name of the file to be sent to the model."""
-
-
 InputUnionMember1OpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
     InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
     InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-    InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
 ]
 
 
@@ -432,67 +400,6 @@ class InputUnionMember1OpenAIResponseMcpApprovalResponse(TypedDict, total=False)
 ]
 
 
-class PromptVariablesOpenAIResponseInputMessageContentText(TypedDict, total=False):
-    text: Required[str]
-    """The text content of the input message"""
-
-    type: Required[Literal["input_text"]]
-    """Content type identifier, always "input_text" """
-
-
-class PromptVariablesOpenAIResponseInputMessageContentImage(TypedDict, total=False):
-    detail: Required[Literal["low", "high", "auto"]]
-    """Level of detail for image processing, can be "low", "high", or "auto" """
-
-    type: Required[Literal["input_image"]]
-    """Content type identifier, always "input_image" """
-
-    file_id: str
-    """(Optional) The ID of the file to be sent to the model."""
-
-    image_url: str
-    """(Optional) URL of the image content"""
-
-
-class PromptVariablesOpenAIResponseInputMessageContentFile(TypedDict, total=False):
-    type: Required[Literal["input_file"]]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: str
-    """The data of the file to be sent to the model."""
-
-    file_id: str
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: str
-    """The URL of the file to be sent to the model."""
-
-    filename: str
-    """The name of the file to be sent to the model."""
-
-
-PromptVariables: TypeAlias = Union[
-    PromptVariablesOpenAIResponseInputMessageContentText,
-    PromptVariablesOpenAIResponseInputMessageContentImage,
-    PromptVariablesOpenAIResponseInputMessageContentFile,
-]
-
-
-class Prompt(TypedDict, total=False):
-    id: Required[str]
-    """Unique identifier of the prompt template"""
-
-    variables: Dict[str, PromptVariables]
-    """
-    Dictionary of variable names to OpenAIResponseInputMessageContent structure for
-    template substitution. The substitution values can either be strings, or other
-    Response input types like images or files.
-    """
-
-    version: str
-    """Version number of the prompt to use (defaults to latest if not specified)"""
-
-
 class TextFormat(TypedDict, total=False):
     type: Required[Literal["text", "json_schema", "json_object"]]
     """Must be "text", "json_schema", or "json_object" to identify the format type"""
diff --git a/src/llama_stack_client/types/response_list_response.py b/src/llama_stack_client/types/response_list_response.py
index ccd9a3d7..78c683b4 100644
--- a/src/llama_stack_client/types/response_list_response.py
+++ b/src/llama_stack_client/types/response_list_response.py
@@ -21,7 +21,6 @@
     "InputOpenAIResponseMessageContentUnionMember1",
     "InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "InputOpenAIResponseMessageContentUnionMember2",
     "InputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "InputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -45,7 +44,6 @@
     "OutputOpenAIResponseMessageContentUnionMember1",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OutputOpenAIResponseMessageContentUnionMember2",
     "OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -65,11 +63,6 @@
     "Text",
     "TextFormat",
     "Error",
-    "Prompt",
-    "PromptVariables",
-    "PromptVariablesOpenAIResponseInputMessageContentText",
-    "PromptVariablesOpenAIResponseInputMessageContentImage",
-    "PromptVariablesOpenAIResponseInputMessageContentFile",
     "Tool",
     "ToolOpenAIResponseInputToolWebSearch",
     "ToolOpenAIResponseInputToolFileSearch",
@@ -99,35 +92,14 @@ class InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCon
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
-class InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
-    type: Literal["input_file"]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: Optional[str] = None
-    """The data of the file to be sent to the model."""
-
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: Optional[str] = None
-    """The URL of the file to be sent to the model."""
-
-    filename: Optional[str] = None
-    """The name of the file to be sent to the model."""
-
-
 InputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-        InputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
@@ -424,35 +396,14 @@ class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCo
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
-class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
-    type: Literal["input_file"]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: Optional[str] = None
-    """The data of the file to be sent to the model."""
-
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: Optional[str] = None
-    """The URL of the file to be sent to the model."""
-
-    filename: Optional[str] = None
-    """The name of the file to be sent to the model."""
-
-
 OutputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-        OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
@@ -748,70 +699,6 @@ class Error(BaseModel):
     """Human-readable error message describing the failure"""
 
 
-class PromptVariablesOpenAIResponseInputMessageContentText(BaseModel):
-    text: str
-    """The text content of the input message"""
-
-    type: Literal["input_text"]
-    """Content type identifier, always "input_text" """
-
-
-class PromptVariablesOpenAIResponseInputMessageContentImage(BaseModel):
-    detail: Literal["low", "high", "auto"]
-    """Level of detail for image processing, can be "low", "high", or "auto" """
-
-    type: Literal["input_image"]
-    """Content type identifier, always "input_image" """
-
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
-    image_url: Optional[str] = None
-    """(Optional) URL of the image content"""
-
-
-class PromptVariablesOpenAIResponseInputMessageContentFile(BaseModel):
-    type: Literal["input_file"]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: Optional[str] = None
-    """The data of the file to be sent to the model."""
-
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: Optional[str] = None
-    """The URL of the file to be sent to the model."""
-
-    filename: Optional[str] = None
-    """The name of the file to be sent to the model."""
-
-
-PromptVariables: TypeAlias = Annotated[
-    Union[
-        PromptVariablesOpenAIResponseInputMessageContentText,
-        PromptVariablesOpenAIResponseInputMessageContentImage,
-        PromptVariablesOpenAIResponseInputMessageContentFile,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class Prompt(BaseModel):
-    id: str
-    """Unique identifier of the prompt template"""
-
-    variables: Optional[Dict[str, PromptVariables]] = None
-    """
-    Dictionary of variable names to OpenAIResponseInputMessageContent structure for
-    template substitution. The substitution values can either be strings, or other
-    Response input types like images or files.
-    """
-
-    version: Optional[str] = None
-    """Version number of the prompt to use (defaults to latest if not specified)"""
-
-
 class ToolOpenAIResponseInputToolWebSearch(BaseModel):
     type: Literal["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
     """Web search tool type variant to use"""
@@ -955,9 +842,6 @@ class ResponseListResponse(BaseModel):
     previous_response_id: Optional[str] = None
     """(Optional) ID of the previous response in a conversation"""
 
-    prompt: Optional[Prompt] = None
-    """(Optional) Reference to a prompt template and its variables."""
-
     temperature: Optional[float] = None
     """(Optional) Sampling temperature used for generation"""
 
diff --git a/src/llama_stack_client/types/response_object.py b/src/llama_stack_client/types/response_object.py
index 706f50e2..57f708ce 100644
--- a/src/llama_stack_client/types/response_object.py
+++ b/src/llama_stack_client/types/response_object.py
@@ -21,7 +21,6 @@
     "OutputOpenAIResponseMessageContentUnionMember1",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OutputOpenAIResponseMessageContentUnionMember2",
     "OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OutputOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -41,11 +40,6 @@
     "Text",
     "TextFormat",
     "Error",
-    "Prompt",
-    "PromptVariables",
-    "PromptVariablesOpenAIResponseInputMessageContentText",
-    "PromptVariablesOpenAIResponseInputMessageContentImage",
-    "PromptVariablesOpenAIResponseInputMessageContentFile",
     "Tool",
     "ToolOpenAIResponseInputToolWebSearch",
     "ToolOpenAIResponseInputToolFileSearch",
@@ -75,35 +69,14 @@ class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCo
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
-class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
-    type: Literal["input_file"]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: Optional[str] = None
-    """The data of the file to be sent to the model."""
-
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: Optional[str] = None
-    """The URL of the file to be sent to the model."""
-
-    filename: Optional[str] = None
-    """The name of the file to be sent to the model."""
-
-
 OutputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-        OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
@@ -399,70 +372,6 @@ class Error(BaseModel):
     """Human-readable error message describing the failure"""
 
 
-class PromptVariablesOpenAIResponseInputMessageContentText(BaseModel):
-    text: str
-    """The text content of the input message"""
-
-    type: Literal["input_text"]
-    """Content type identifier, always "input_text" """
-
-
-class PromptVariablesOpenAIResponseInputMessageContentImage(BaseModel):
-    detail: Literal["low", "high", "auto"]
-    """Level of detail for image processing, can be "low", "high", or "auto" """
-
-    type: Literal["input_image"]
-    """Content type identifier, always "input_image" """
-
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
-    image_url: Optional[str] = None
-    """(Optional) URL of the image content"""
-
-
-class PromptVariablesOpenAIResponseInputMessageContentFile(BaseModel):
-    type: Literal["input_file"]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: Optional[str] = None
-    """The data of the file to be sent to the model."""
-
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: Optional[str] = None
-    """The URL of the file to be sent to the model."""
-
-    filename: Optional[str] = None
-    """The name of the file to be sent to the model."""
-
-
-PromptVariables: TypeAlias = Annotated[
-    Union[
-        PromptVariablesOpenAIResponseInputMessageContentText,
-        PromptVariablesOpenAIResponseInputMessageContentImage,
-        PromptVariablesOpenAIResponseInputMessageContentFile,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class Prompt(BaseModel):
-    id: str
-    """Unique identifier of the prompt template"""
-
-    variables: Optional[Dict[str, PromptVariables]] = None
-    """
-    Dictionary of variable names to OpenAIResponseInputMessageContent structure for
-    template substitution. The substitution values can either be strings, or other
-    Response input types like images or files.
-    """
-
-    version: Optional[str] = None
-    """Version number of the prompt to use (defaults to latest if not specified)"""
-
-
 class ToolOpenAIResponseInputToolWebSearch(BaseModel):
     type: Literal["web_search", "web_search_preview", "web_search_preview_2025_03_11"]
     """Web search tool type variant to use"""
@@ -613,9 +522,6 @@ def output_text(self) -> str:
     previous_response_id: Optional[str] = None
     """(Optional) ID of the previous response in a conversation"""
 
-    prompt: Optional[Prompt] = None
-    """(Optional) Reference to a prompt template and its variables."""
-
     temperature: Optional[float] = None
     """(Optional) Sampling temperature used for generation"""
 
diff --git a/src/llama_stack_client/types/response_object_stream.py b/src/llama_stack_client/types/response_object_stream.py
index 16fe6c6d..a75ac721 100644
--- a/src/llama_stack_client/types/response_object_stream.py
+++ b/src/llama_stack_client/types/response_object_stream.py
@@ -23,7 +23,6 @@
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -46,7 +45,6 @@
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -161,37 +159,14 @@ class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessage
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(
-    BaseModel
-):
-    type: Literal["input_file"]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: Optional[str] = None
-    """The data of the file to be sent to the model."""
-
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: Optional[str] = None
-    """The URL of the file to be sent to the model."""
-
-    filename: Optional[str] = None
-    """The name of the file to be sent to the model."""
-
-
 OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
@@ -495,37 +470,14 @@ class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageC
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(
-    BaseModel
-):
-    type: Literal["input_file"]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: Optional[str] = None
-    """The data of the file to be sent to the model."""
-
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: Optional[str] = None
-    """The URL of the file to be sent to the model."""
-
-    filename: Optional[str] = None
-    """The name of the file to be sent to the model."""
-
-
 OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/responses/input_item_list_response.py b/src/llama_stack_client/types/responses/input_item_list_response.py
index 71a59f50..b812ee62 100644
--- a/src/llama_stack_client/types/responses/input_item_list_response.py
+++ b/src/llama_stack_client/types/responses/input_item_list_response.py
@@ -19,7 +19,6 @@
     "DataOpenAIResponseMessageContentUnionMember1",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile",
     "DataOpenAIResponseMessageContentUnionMember2",
     "DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputText",
     "DataOpenAIResponseMessageContentUnionMember2OpenAIResponseOutputMessageContentOutputTextAnnotation",
@@ -56,35 +55,14 @@ class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
     type: Literal["input_image"]
     """Content type identifier, always "input_image" """
 
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
     image_url: Optional[str] = None
     """(Optional) URL of the image content"""
 
 
-class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile(BaseModel):
-    type: Literal["input_file"]
-    """The type of the input item. Always `input_file`."""
-
-    file_data: Optional[str] = None
-    """The data of the file to be sent to the model."""
-
-    file_id: Optional[str] = None
-    """(Optional) The ID of the file to be sent to the model."""
-
-    file_url: Optional[str] = None
-    """The URL of the file to be sent to the model."""
-
-    filename: Optional[str] = None
-    """The name of the file to be sent to the model."""
-
-
 DataOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
     Union[
         DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
         DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-        DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentFile,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/llama_stack_client/types/route_list_params.py b/src/llama_stack_client/types/route_list_params.py
deleted file mode 100644
index 764b13c7..00000000
--- a/src/llama_stack_client/types/route_list_params.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, TypedDict
-
-__all__ = ["RouteListParams"]
-
-
-class RouteListParams(TypedDict, total=False):
-    api_filter: Literal["v1", "v1alpha", "v1beta", "deprecated"]
-    """Optional filter to control which routes are returned.
-
-    Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at
-    that level, or 'deprecated' to show deprecated routes across all levels. If not
-    specified, returns only non-deprecated v1 routes.
-    """
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
index 3bdafe3c..5ef731fd 100644
--- a/tests/api_resources/test_responses.py
+++ b/tests/api_resources/test_responses.py
@@ -46,16 +46,6 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
             instructions="instructions",
             max_infer_iters=0,
             previous_response_id="previous_response_id",
-            prompt={
-                "id": "id",
-                "variables": {
-                    "foo": {
-                        "text": "text",
-                        "type": "input_text",
-                    }
-                },
-                "version": "version",
-            },
             store=True,
             stream=False,
             temperature=0,
@@ -123,16 +113,6 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
             instructions="instructions",
             max_infer_iters=0,
             previous_response_id="previous_response_id",
-            prompt={
-                "id": "id",
-                "variables": {
-                    "foo": {
-                        "text": "text",
-                        "type": "input_text",
-                    }
-                },
-                "version": "version",
-            },
             store=True,
             temperature=0,
             text={
@@ -315,16 +295,6 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             instructions="instructions",
             max_infer_iters=0,
             previous_response_id="previous_response_id",
-            prompt={
-                "id": "id",
-                "variables": {
-                    "foo": {
-                        "text": "text",
-                        "type": "input_text",
-                    }
-                },
-                "version": "version",
-            },
             store=True,
             stream=False,
             temperature=0,
@@ -392,16 +362,6 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             instructions="instructions",
             max_infer_iters=0,
             previous_response_id="previous_response_id",
-            prompt={
-                "id": "id",
-                "variables": {
-                    "foo": {
-                        "text": "text",
-                        "type": "input_text",
-                    }
-                },
-                "version": "version",
-            },
             store=True,
             temperature=0,
             text={
diff --git a/tests/api_resources/test_routes.py b/tests/api_resources/test_routes.py
index 58ab8ad9..9c863f26 100644
--- a/tests/api_resources/test_routes.py
+++ b/tests/api_resources/test_routes.py
@@ -28,13 +28,6 @@ def test_method_list(self, client: LlamaStackClient) -> None:
         route = client.routes.list()
         assert_matches_type(RouteListResponse, route, path=["response"])
 
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        route = client.routes.list(
-            api_filter="v1",
-        )
-        assert_matches_type(RouteListResponse, route, path=["response"])
-
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
         response = client.routes.with_raw_response.list()
@@ -66,13 +59,6 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         route = await async_client.routes.list()
         assert_matches_type(RouteListResponse, route, path=["response"])
 
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        route = await async_client.routes.list(
-            api_filter="v1",
-        )
-        assert_matches_type(RouteListResponse, route, path=["response"])
-
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.routes.with_raw_response.list()

From d8617084062acbb81c26b6c22ea613e397aa969b Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 30 Oct 2025 02:13:30 +0000
Subject: [PATCH 4/9] fix(client): close streams without requiring full
 consumption

---
 src/llama_stack_client/_streaming.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/llama_stack_client/_streaming.py b/src/llama_stack_client/_streaming.py
index 5f23d84d..e732cb57 100644
--- a/src/llama_stack_client/_streaming.py
+++ b/src/llama_stack_client/_streaming.py
@@ -63,9 +63,8 @@ def __stream__(self) -> Iterator[_T]:
         for sse in iterator:
             yield process_data(data=sse.json(), cast_to=cast_to, response=response)
 
-        # Ensure the entire stream is consumed
-        for _sse in iterator:
-            ...
+        # As we might not fully consume the response stream, we need to close it explicitly
+        response.close()
 
     def __enter__(self) -> Self:
         return self
@@ -127,9 +126,8 @@ async def __stream__(self) -> AsyncIterator[_T]:
         async for sse in iterator:
             yield process_data(data=sse.json(), cast_to=cast_to, response=response)
 
-        # Ensure the entire stream is consumed
-        async for _sse in iterator:
-            ...
+        # As we might not fully consume the response stream, we need to close it explicitly
+        await response.aclose()
 
     async def __aenter__(self) -> Self:
         return self

From 114198bef4244ec27f7e163beb2e554da0dbd213 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 30 Oct 2025 16:58:34 +0000
Subject: [PATCH 5/9] feat(api): Adding prompts API to stainless config

Adding prompts API to stainless config
---
 .stats.yml                                    |   4 +-
 api.md                                        |  23 +
 src/llama_stack_client/_client.py             |  38 +
 src/llama_stack_client/resources/__init__.py  |  14 +
 .../resources/prompts/__init__.py             |  33 +
 .../resources/prompts/prompts.py              | 676 ++++++++++++++++++
 .../resources/prompts/versions.py             | 176 +++++
 src/llama_stack_client/types/__init__.py      |   7 +
 .../types/list_prompts_response.py            |  10 +
 src/llama_stack_client/types/prompt.py        |  27 +
 .../types/prompt_create_params.py             |  17 +
 .../types/prompt_list_response.py             |  10 +
 .../types/prompt_retrieve_params.py           |  12 +
 .../prompt_set_default_version_params.py      |  12 +
 .../types/prompt_update_params.py             |  23 +
 .../types/prompts/__init__.py                 |   3 +
 tests/api_resources/prompts/__init__.py       |   1 +
 tests/api_resources/prompts/test_versions.py  | 100 +++
 tests/api_resources/test_prompts.py           | 529 ++++++++++++++
 19 files changed, 1713 insertions(+), 2 deletions(-)
 create mode 100644 src/llama_stack_client/resources/prompts/__init__.py
 create mode 100644 src/llama_stack_client/resources/prompts/prompts.py
 create mode 100644 src/llama_stack_client/resources/prompts/versions.py
 create mode 100644 src/llama_stack_client/types/list_prompts_response.py
 create mode 100644 src/llama_stack_client/types/prompt.py
 create mode 100644 src/llama_stack_client/types/prompt_create_params.py
 create mode 100644 src/llama_stack_client/types/prompt_list_response.py
 create mode 100644 src/llama_stack_client/types/prompt_retrieve_params.py
 create mode 100644 src/llama_stack_client/types/prompt_set_default_version_params.py
 create mode 100644 src/llama_stack_client/types/prompt_update_params.py
 create mode 100644 src/llama_stack_client/types/prompts/__init__.py
 create mode 100644 tests/api_resources/prompts/__init__.py
 create mode 100644 tests/api_resources/prompts/test_versions.py
 create mode 100644 tests/api_resources/test_prompts.py

diff --git a/.stats.yml b/.stats.yml
index 24428280..60e64c3c 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 104
+configured_endpoints: 111
 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-35c6569e5e9fcc85084c9728eb7fc7c5908297fcc77043d621d25de3c850a990.yml
 openapi_spec_hash: 0f95bbeee16f3205d36ec34cfa62c711
-config_hash: fa14a2107881931b2ddef8c768eeb558
+config_hash: ef275cc002a89629459fd73d0cf9cba9
diff --git a/api.md b/api.md
index 5f7a90a2..57ecd092 100644
--- a/api.md
+++ b/api.md
@@ -102,6 +102,29 @@ Methods:
 
 - <code title="get /v1/responses/{response_id}/input_items">client.responses.input_items.<a href="./src/llama_stack_client/resources/responses/input_items.py">list</a>(response_id, \*\*<a href="src/llama_stack_client/types/responses/input_item_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/responses/input_item_list_response.py">InputItemListResponse</a></code>
 
+# Prompts
+
+Types:
+
+```python
+from llama_stack_client.types import ListPromptsResponse, Prompt, PromptListResponse
+```
+
+Methods:
+
+- <code title="post /v1/prompts">client.prompts.<a href="./src/llama_stack_client/resources/prompts/prompts.py">create</a>(\*\*<a href="src/llama_stack_client/types/prompt_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/prompt.py">Prompt</a></code>
+- <code title="get /v1/prompts/{prompt_id}">client.prompts.<a href="./src/llama_stack_client/resources/prompts/prompts.py">retrieve</a>(prompt_id, \*\*<a href="src/llama_stack_client/types/prompt_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/prompt.py">Prompt</a></code>
+- <code title="post /v1/prompts/{prompt_id}">client.prompts.<a href="./src/llama_stack_client/resources/prompts/prompts.py">update</a>(prompt_id, \*\*<a href="src/llama_stack_client/types/prompt_update_params.py">params</a>) -> <a href="./src/llama_stack_client/types/prompt.py">Prompt</a></code>
+- <code title="get /v1/prompts">client.prompts.<a href="./src/llama_stack_client/resources/prompts/prompts.py">list</a>() -> <a href="./src/llama_stack_client/types/prompt_list_response.py">PromptListResponse</a></code>
+- <code title="delete /v1/prompts/{prompt_id}">client.prompts.<a href="./src/llama_stack_client/resources/prompts/prompts.py">delete</a>(prompt_id) -> None</code>
+- <code title="post /v1/prompts/{prompt_id}/set-default-version">client.prompts.<a href="./src/llama_stack_client/resources/prompts/prompts.py">set_default_version</a>(prompt_id, \*\*<a href="src/llama_stack_client/types/prompt_set_default_version_params.py">params</a>) -> <a href="./src/llama_stack_client/types/prompt.py">Prompt</a></code>
+
+## Versions
+
+Methods:
+
+- <code title="get /v1/prompts/{prompt_id}/versions">client.prompts.versions.<a href="./src/llama_stack_client/resources/prompts/versions.py">list</a>(prompt_id) -> <a href="./src/llama_stack_client/types/prompt_list_response.py">PromptListResponse</a></code>
+
 # Conversations
 
 Types:
diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
index 34de181a..b1880a7e 100644
--- a/src/llama_stack_client/_client.py
+++ b/src/llama_stack_client/_client.py
@@ -48,6 +48,7 @@
         routes,
         safety,
         inspect,
+        prompts,
         scoring,
         shields,
         providers,
@@ -80,6 +81,7 @@
     from .resources.completions import CompletionsResource, AsyncCompletionsResource
     from .resources.moderations import ModerationsResource, AsyncModerationsResource
     from .resources.models.models import ModelsResource, AsyncModelsResource
+    from .resources.prompts.prompts import PromptsResource, AsyncPromptsResource
     from .resources.scoring_functions import ScoringFunctionsResource, AsyncScoringFunctionsResource
     from .resources.responses.responses import ResponsesResource, AsyncResponsesResource
     from .resources.synthetic_data_generation import (
@@ -183,6 +185,12 @@ def responses(self) -> ResponsesResource:
 
         return ResponsesResource(self)
 
+    @cached_property
+    def prompts(self) -> PromptsResource:
+        from .resources.prompts import PromptsResource
+
+        return PromptsResource(self)
+
     @cached_property
     def conversations(self) -> ConversationsResource:
         from .resources.conversations import ConversationsResource
@@ -493,6 +501,12 @@ def responses(self) -> AsyncResponsesResource:
 
         return AsyncResponsesResource(self)
 
+    @cached_property
+    def prompts(self) -> AsyncPromptsResource:
+        from .resources.prompts import AsyncPromptsResource
+
+        return AsyncPromptsResource(self)
+
     @cached_property
     def conversations(self) -> AsyncConversationsResource:
         from .resources.conversations import AsyncConversationsResource
@@ -752,6 +766,12 @@ def responses(self) -> responses.ResponsesResourceWithRawResponse:
 
         return ResponsesResourceWithRawResponse(self._client.responses)
 
+    @cached_property
+    def prompts(self) -> prompts.PromptsResourceWithRawResponse:
+        from .resources.prompts import PromptsResourceWithRawResponse
+
+        return PromptsResourceWithRawResponse(self._client.prompts)
+
     @cached_property
     def conversations(self) -> conversations.ConversationsResourceWithRawResponse:
         from .resources.conversations import ConversationsResourceWithRawResponse
@@ -897,6 +917,12 @@ def responses(self) -> responses.AsyncResponsesResourceWithRawResponse:
 
         return AsyncResponsesResourceWithRawResponse(self._client.responses)
 
+    @cached_property
+    def prompts(self) -> prompts.AsyncPromptsResourceWithRawResponse:
+        from .resources.prompts import AsyncPromptsResourceWithRawResponse
+
+        return AsyncPromptsResourceWithRawResponse(self._client.prompts)
+
     @cached_property
     def conversations(self) -> conversations.AsyncConversationsResourceWithRawResponse:
         from .resources.conversations import AsyncConversationsResourceWithRawResponse
@@ -1044,6 +1070,12 @@ def responses(self) -> responses.ResponsesResourceWithStreamingResponse:
 
         return ResponsesResourceWithStreamingResponse(self._client.responses)
 
+    @cached_property
+    def prompts(self) -> prompts.PromptsResourceWithStreamingResponse:
+        from .resources.prompts import PromptsResourceWithStreamingResponse
+
+        return PromptsResourceWithStreamingResponse(self._client.prompts)
+
     @cached_property
     def conversations(self) -> conversations.ConversationsResourceWithStreamingResponse:
         from .resources.conversations import ConversationsResourceWithStreamingResponse
@@ -1191,6 +1223,12 @@ def responses(self) -> responses.AsyncResponsesResourceWithStreamingResponse:
 
         return AsyncResponsesResourceWithStreamingResponse(self._client.responses)
 
+    @cached_property
+    def prompts(self) -> prompts.AsyncPromptsResourceWithStreamingResponse:
+        from .resources.prompts import AsyncPromptsResourceWithStreamingResponse
+
+        return AsyncPromptsResourceWithStreamingResponse(self._client.prompts)
+
     @cached_property
     def conversations(self) -> conversations.AsyncConversationsResourceWithStreamingResponse:
         from .resources.conversations import AsyncConversationsResourceWithStreamingResponse
diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py
index 60b18979..807ed6c8 100644
--- a/src/llama_stack_client/resources/__init__.py
+++ b/src/llama_stack_client/resources/__init__.py
@@ -78,6 +78,14 @@
     InspectResourceWithStreamingResponse,
     AsyncInspectResourceWithStreamingResponse,
 )
+from .prompts import (
+    PromptsResource,
+    AsyncPromptsResource,
+    PromptsResourceWithRawResponse,
+    AsyncPromptsResourceWithRawResponse,
+    PromptsResourceWithStreamingResponse,
+    AsyncPromptsResourceWithStreamingResponse,
+)
 from .scoring import (
     ScoringResource,
     AsyncScoringResource,
@@ -216,6 +224,12 @@
     "AsyncResponsesResourceWithRawResponse",
     "ResponsesResourceWithStreamingResponse",
     "AsyncResponsesResourceWithStreamingResponse",
+    "PromptsResource",
+    "AsyncPromptsResource",
+    "PromptsResourceWithRawResponse",
+    "AsyncPromptsResourceWithRawResponse",
+    "PromptsResourceWithStreamingResponse",
+    "AsyncPromptsResourceWithStreamingResponse",
     "ConversationsResource",
     "AsyncConversationsResource",
     "ConversationsResourceWithRawResponse",
diff --git a/src/llama_stack_client/resources/prompts/__init__.py b/src/llama_stack_client/resources/prompts/__init__.py
new file mode 100644
index 00000000..d8c5c535
--- /dev/null
+++ b/src/llama_stack_client/resources/prompts/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .prompts import (
+    PromptsResource,
+    AsyncPromptsResource,
+    PromptsResourceWithRawResponse,
+    AsyncPromptsResourceWithRawResponse,
+    PromptsResourceWithStreamingResponse,
+    AsyncPromptsResourceWithStreamingResponse,
+)
+from .versions import (
+    VersionsResource,
+    AsyncVersionsResource,
+    VersionsResourceWithRawResponse,
+    AsyncVersionsResourceWithRawResponse,
+    VersionsResourceWithStreamingResponse,
+    AsyncVersionsResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "VersionsResource",
+    "AsyncVersionsResource",
+    "VersionsResourceWithRawResponse",
+    "AsyncVersionsResourceWithRawResponse",
+    "VersionsResourceWithStreamingResponse",
+    "AsyncVersionsResourceWithStreamingResponse",
+    "PromptsResource",
+    "AsyncPromptsResource",
+    "PromptsResourceWithRawResponse",
+    "AsyncPromptsResourceWithRawResponse",
+    "PromptsResourceWithStreamingResponse",
+    "AsyncPromptsResourceWithStreamingResponse",
+]
diff --git a/src/llama_stack_client/resources/prompts/prompts.py b/src/llama_stack_client/resources/prompts/prompts.py
new file mode 100644
index 00000000..aa14c6f2
--- /dev/null
+++ b/src/llama_stack_client/resources/prompts/prompts.py
@@ -0,0 +1,676 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Type, cast
+
+import httpx
+
+from ...types import (
+    prompt_create_params,
+    prompt_update_params,
+    prompt_retrieve_params,
+    prompt_set_default_version_params,
+)
+from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, SequenceNotStr, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from .versions import (
+    VersionsResource,
+    AsyncVersionsResource,
+    VersionsResourceWithRawResponse,
+    AsyncVersionsResourceWithRawResponse,
+    VersionsResourceWithStreamingResponse,
+    AsyncVersionsResourceWithStreamingResponse,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..._wrappers import DataWrapper
+from ..._base_client import make_request_options
+from ...types.prompt import Prompt
+from ...types.prompt_list_response import PromptListResponse
+
+__all__ = ["PromptsResource", "AsyncPromptsResource"]
+
+
+class PromptsResource(SyncAPIResource):
+    @cached_property
+    def versions(self) -> VersionsResource:
+        return VersionsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> PromptsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return PromptsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> PromptsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return PromptsResourceWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        prompt: str,
+        variables: SequenceNotStr[str] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """Create prompt.
+
+        Create a new prompt.
+
+        Args:
+          prompt: The prompt text content with variable placeholders.
+
+          variables: List of variable names that can be used in the prompt template.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/v1/prompts",
+            body=maybe_transform(
+                {
+                    "prompt": prompt,
+                    "variables": variables,
+                },
+                prompt_create_params.PromptCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Prompt,
+        )
+
+    def retrieve(
+        self,
+        prompt_id: str,
+        *,
+        version: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """Get prompt.
+
+        Get a prompt by its identifier and optional version.
+
+        Args:
+          version: The version of the prompt to get (defaults to latest).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not prompt_id:
+            raise ValueError(f"Expected a non-empty value for `prompt_id` but received {prompt_id!r}")
+        return self._get(
+            f"/v1/prompts/{prompt_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"version": version}, prompt_retrieve_params.PromptRetrieveParams),
+            ),
+            cast_to=Prompt,
+        )
+
+    def update(
+        self,
+        prompt_id: str,
+        *,
+        prompt: str,
+        set_as_default: bool,
+        version: int,
+        variables: SequenceNotStr[str] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """Update prompt.
+
+        Update an existing prompt (increments version).
+
+        Args:
+          prompt: The updated prompt text content.
+
+          set_as_default: Set the new version as the default (default=True).
+
+          version: The current version of the prompt being updated.
+
+          variables: Updated list of variable names that can be used in the prompt template.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not prompt_id:
+            raise ValueError(f"Expected a non-empty value for `prompt_id` but received {prompt_id!r}")
+        return self._post(
+            f"/v1/prompts/{prompt_id}",
+            body=maybe_transform(
+                {
+                    "prompt": prompt,
+                    "set_as_default": set_as_default,
+                    "version": version,
+                    "variables": variables,
+                },
+                prompt_update_params.PromptUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Prompt,
+        )
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> PromptListResponse:
+        """List all prompts."""
+        return self._get(
+            "/v1/prompts",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=DataWrapper[PromptListResponse]._unwrapper,
+            ),
+            cast_to=cast(Type[PromptListResponse], DataWrapper[PromptListResponse]),
+        )
+
+    def delete(
+        self,
+        prompt_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """Delete prompt.
+
+        Delete a prompt.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not prompt_id:
+            raise ValueError(f"Expected a non-empty value for `prompt_id` but received {prompt_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/v1/prompts/{prompt_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    def set_default_version(
+        self,
+        prompt_id: str,
+        *,
+        version: int,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """Set prompt version.
+
+        Set which version of a prompt should be the default in
+        get_prompt (latest).
+
+        Args:
+          version: The version to set as default.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not prompt_id:
+            raise ValueError(f"Expected a non-empty value for `prompt_id` but received {prompt_id!r}")
+        return self._post(
+            f"/v1/prompts/{prompt_id}/set-default-version",
+            body=maybe_transform({"version": version}, prompt_set_default_version_params.PromptSetDefaultVersionParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Prompt,
+        )
+
+
+class AsyncPromptsResource(AsyncAPIResource):
+    @cached_property
+    def versions(self) -> AsyncVersionsResource:
+        return AsyncVersionsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncPromptsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncPromptsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncPromptsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncPromptsResourceWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        prompt: str,
+        variables: SequenceNotStr[str] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """Create prompt.
+
+        Create a new prompt.
+
+        Args:
+          prompt: The prompt text content with variable placeholders.
+
+          variables: List of variable names that can be used in the prompt template.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/v1/prompts",
+            body=await async_maybe_transform(
+                {
+                    "prompt": prompt,
+                    "variables": variables,
+                },
+                prompt_create_params.PromptCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Prompt,
+        )
+
+    async def retrieve(
+        self,
+        prompt_id: str,
+        *,
+        version: int | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """Get prompt.
+
+        Get a prompt by its identifier and optional version.
+
+        Args:
+          version: The version of the prompt to get (defaults to latest).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not prompt_id:
+            raise ValueError(f"Expected a non-empty value for `prompt_id` but received {prompt_id!r}")
+        return await self._get(
+            f"/v1/prompts/{prompt_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"version": version}, prompt_retrieve_params.PromptRetrieveParams),
+            ),
+            cast_to=Prompt,
+        )
+
+    async def update(
+        self,
+        prompt_id: str,
+        *,
+        prompt: str,
+        set_as_default: bool,
+        version: int,
+        variables: SequenceNotStr[str] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """Update prompt.
+
+        Update an existing prompt (increments version).
+
+        Args:
+          prompt: The updated prompt text content.
+
+          set_as_default: Set the new version as the default (default=True).
+
+          version: The current version of the prompt being updated.
+
+          variables: Updated list of variable names that can be used in the prompt template.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not prompt_id:
+            raise ValueError(f"Expected a non-empty value for `prompt_id` but received {prompt_id!r}")
+        return await self._post(
+            f"/v1/prompts/{prompt_id}",
+            body=await async_maybe_transform(
+                {
+                    "prompt": prompt,
+                    "set_as_default": set_as_default,
+                    "version": version,
+                    "variables": variables,
+                },
+                prompt_update_params.PromptUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Prompt,
+        )
+
+    async def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> PromptListResponse:
+        """List all prompts."""
+        return await self._get(
+            "/v1/prompts",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=DataWrapper[PromptListResponse]._unwrapper,
+            ),
+            cast_to=cast(Type[PromptListResponse], DataWrapper[PromptListResponse]),
+        )
+
+    async def delete(
+        self,
+        prompt_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """Delete prompt.
+
+        Delete a prompt.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not prompt_id:
+            raise ValueError(f"Expected a non-empty value for `prompt_id` but received {prompt_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/v1/prompts/{prompt_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    async def set_default_version(
+        self,
+        prompt_id: str,
+        *,
+        version: int,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> Prompt:
+        """Set prompt version.
+
+        Set which version of a prompt should be the default in
+        get_prompt (latest).
+
+        Args:
+          version: The version to set as default.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not prompt_id:
+            raise ValueError(f"Expected a non-empty value for `prompt_id` but received {prompt_id!r}")
+        return await self._post(
+            f"/v1/prompts/{prompt_id}/set-default-version",
+            body=await async_maybe_transform(
+                {"version": version}, prompt_set_default_version_params.PromptSetDefaultVersionParams
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Prompt,
+        )
+
+
+class PromptsResourceWithRawResponse:
+    def __init__(self, prompts: PromptsResource) -> None:
+        self._prompts = prompts
+
+        self.create = to_raw_response_wrapper(
+            prompts.create,
+        )
+        self.retrieve = to_raw_response_wrapper(
+            prompts.retrieve,
+        )
+        self.update = to_raw_response_wrapper(
+            prompts.update,
+        )
+        self.list = to_raw_response_wrapper(
+            prompts.list,
+        )
+        self.delete = to_raw_response_wrapper(
+            prompts.delete,
+        )
+        self.set_default_version = to_raw_response_wrapper(
+            prompts.set_default_version,
+        )
+
+    @cached_property
+    def versions(self) -> VersionsResourceWithRawResponse:
+        return VersionsResourceWithRawResponse(self._prompts.versions)
+
+
+class AsyncPromptsResourceWithRawResponse:
+    def __init__(self, prompts: AsyncPromptsResource) -> None:
+        self._prompts = prompts
+
+        self.create = async_to_raw_response_wrapper(
+            prompts.create,
+        )
+        self.retrieve = async_to_raw_response_wrapper(
+            prompts.retrieve,
+        )
+        self.update = async_to_raw_response_wrapper(
+            prompts.update,
+        )
+        self.list = async_to_raw_response_wrapper(
+            prompts.list,
+        )
+        self.delete = async_to_raw_response_wrapper(
+            prompts.delete,
+        )
+        self.set_default_version = async_to_raw_response_wrapper(
+            prompts.set_default_version,
+        )
+
+    @cached_property
+    def versions(self) -> AsyncVersionsResourceWithRawResponse:
+        return AsyncVersionsResourceWithRawResponse(self._prompts.versions)
+
+
+class PromptsResourceWithStreamingResponse:
+    def __init__(self, prompts: PromptsResource) -> None:
+        self._prompts = prompts
+
+        self.create = to_streamed_response_wrapper(
+            prompts.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            prompts.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            prompts.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            prompts.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            prompts.delete,
+        )
+        self.set_default_version = to_streamed_response_wrapper(
+            prompts.set_default_version,
+        )
+
+    @cached_property
+    def versions(self) -> VersionsResourceWithStreamingResponse:
+        return VersionsResourceWithStreamingResponse(self._prompts.versions)
+
+
+class AsyncPromptsResourceWithStreamingResponse:
+    def __init__(self, prompts: AsyncPromptsResource) -> None:
+        self._prompts = prompts
+
+        self.create = async_to_streamed_response_wrapper(
+            prompts.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            prompts.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            prompts.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            prompts.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            prompts.delete,
+        )
+        self.set_default_version = async_to_streamed_response_wrapper(
+            prompts.set_default_version,
+        )
+
+    @cached_property
+    def versions(self) -> AsyncVersionsResourceWithStreamingResponse:
+        return AsyncVersionsResourceWithStreamingResponse(self._prompts.versions)
diff --git a/src/llama_stack_client/resources/prompts/versions.py b/src/llama_stack_client/resources/prompts/versions.py
new file mode 100644
index 00000000..bf074dc7
--- /dev/null
+++ b/src/llama_stack_client/resources/prompts/versions.py
@@ -0,0 +1,176 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Type, cast
+
+import httpx
+
+from ..._types import Body, Query, Headers, NotGiven, not_given
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..._wrappers import DataWrapper
+from ..._base_client import make_request_options
+from ...types.prompt_list_response import PromptListResponse
+
+__all__ = ["VersionsResource", "AsyncVersionsResource"]
+
+
+class VersionsResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> VersionsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return VersionsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> VersionsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return VersionsResourceWithStreamingResponse(self)
+
+    def list(
+        self,
+        prompt_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> PromptListResponse:
+        """List prompt versions.
+
+        List all versions of a specific prompt.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not prompt_id:
+            raise ValueError(f"Expected a non-empty value for `prompt_id` but received {prompt_id!r}")
+        return self._get(
+            f"/v1/prompts/{prompt_id}/versions",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=DataWrapper[PromptListResponse]._unwrapper,
+            ),
+            cast_to=cast(Type[PromptListResponse], DataWrapper[PromptListResponse]),
+        )
+
+
+class AsyncVersionsResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncVersionsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncVersionsResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncVersionsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncVersionsResourceWithStreamingResponse(self)
+
+    async def list(
+        self,
+        prompt_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> PromptListResponse:
+        """List prompt versions.
+
+        List all versions of a specific prompt.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not prompt_id:
+            raise ValueError(f"Expected a non-empty value for `prompt_id` but received {prompt_id!r}")
+        return await self._get(
+            f"/v1/prompts/{prompt_id}/versions",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                post_parser=DataWrapper[PromptListResponse]._unwrapper,
+            ),
+            cast_to=cast(Type[PromptListResponse], DataWrapper[PromptListResponse]),
+        )
+
+
+class VersionsResourceWithRawResponse:
+    def __init__(self, versions: VersionsResource) -> None:
+        self._versions = versions
+
+        self.list = to_raw_response_wrapper(
+            versions.list,
+        )
+
+
+class AsyncVersionsResourceWithRawResponse:
+    def __init__(self, versions: AsyncVersionsResource) -> None:
+        self._versions = versions
+
+        self.list = async_to_raw_response_wrapper(
+            versions.list,
+        )
+
+
+class VersionsResourceWithStreamingResponse:
+    def __init__(self, versions: VersionsResource) -> None:
+        self._versions = versions
+
+        self.list = to_streamed_response_wrapper(
+            versions.list,
+        )
+
+
+class AsyncVersionsResourceWithStreamingResponse:
+    def __init__(self, versions: AsyncVersionsResource) -> None:
+        self._versions = versions
+
+        self.list = async_to_streamed_response_wrapper(
+            versions.list,
+        )
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index 173a1e03..4b6a2b84 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -10,6 +10,7 @@
 
 from .file import File as File
 from .model import Model as Model
+from .prompt import Prompt as Prompt
 from .shared import (
     Message as Message,
     Document as Document,
@@ -54,13 +55,18 @@
 from .delete_file_response import DeleteFileResponse as DeleteFileResponse
 from .list_models_response import ListModelsResponse as ListModelsResponse
 from .list_routes_response import ListRoutesResponse as ListRoutesResponse
+from .prompt_create_params import PromptCreateParams as PromptCreateParams
+from .prompt_list_response import PromptListResponse as PromptListResponse
+from .prompt_update_params import PromptUpdateParams as PromptUpdateParams
 from .response_list_params import ResponseListParams as ResponseListParams
 from .scoring_score_params import ScoringScoreParams as ScoringScoreParams
 from .shield_list_response import ShieldListResponse as ShieldListResponse
 from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
+from .list_prompts_response import ListPromptsResponse as ListPromptsResponse
 from .list_shields_response import ListShieldsResponse as ListShieldsResponse
 from .model_register_params import ModelRegisterParams as ModelRegisterParams
 from .query_chunks_response import QueryChunksResponse as QueryChunksResponse
+from .prompt_retrieve_params import PromptRetrieveParams as PromptRetrieveParams
 from .provider_list_response import ProviderListResponse as ProviderListResponse
 from .response_create_params import ResponseCreateParams as ResponseCreateParams
 from .response_list_response import ResponseListResponse as ResponseListResponse
@@ -100,6 +106,7 @@
 from .tool_runtime_invoke_tool_params import ToolRuntimeInvokeToolParams as ToolRuntimeInvokeToolParams
 from .scoring_function_register_params import ScoringFunctionRegisterParams as ScoringFunctionRegisterParams
 from .tool_runtime_list_tools_response import ToolRuntimeListToolsResponse as ToolRuntimeListToolsResponse
+from .prompt_set_default_version_params import PromptSetDefaultVersionParams as PromptSetDefaultVersionParams
 from .synthetic_data_generation_response import SyntheticDataGenerationResponse as SyntheticDataGenerationResponse
 from .synthetic_data_generation_generate_params import (
     SyntheticDataGenerationGenerateParams as SyntheticDataGenerationGenerateParams,
diff --git a/src/llama_stack_client/types/list_prompts_response.py b/src/llama_stack_client/types/list_prompts_response.py
new file mode 100644
index 00000000..cc5058d3
--- /dev/null
+++ b/src/llama_stack_client/types/list_prompts_response.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+from .prompt_list_response import PromptListResponse
+
+__all__ = ["ListPromptsResponse"]
+
+
+class ListPromptsResponse(BaseModel):
+    data: PromptListResponse
diff --git a/src/llama_stack_client/types/prompt.py b/src/llama_stack_client/types/prompt.py
new file mode 100644
index 00000000..f88c74ad
--- /dev/null
+++ b/src/llama_stack_client/types/prompt.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from .._models import BaseModel
+
+__all__ = ["Prompt"]
+
+
+class Prompt(BaseModel):
+    is_default: bool
+    """Boolean indicating whether this version is the default version for this prompt"""
+
+    prompt_id: str
+    """Unique identifier formatted as 'pmpt\\__<48-digit-hash>'"""
+
+    variables: List[str]
+    """List of prompt variable names that can be used in the prompt template"""
+
+    version: int
+    """Version (integer starting at 1, incremented on save)"""
+
+    prompt: Optional[str] = None
+    """The system prompt text with variable placeholders.
+
+    Variables are only supported when using the Responses API.
+    """
diff --git a/src/llama_stack_client/types/prompt_create_params.py b/src/llama_stack_client/types/prompt_create_params.py
new file mode 100644
index 00000000..a1c2b411
--- /dev/null
+++ b/src/llama_stack_client/types/prompt_create_params.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from .._types import SequenceNotStr
+
+__all__ = ["PromptCreateParams"]
+
+
+class PromptCreateParams(TypedDict, total=False):
+    prompt: Required[str]
+    """The prompt text content with variable placeholders."""
+
+    variables: SequenceNotStr[str]
+    """List of variable names that can be used in the prompt template."""
diff --git a/src/llama_stack_client/types/prompt_list_response.py b/src/llama_stack_client/types/prompt_list_response.py
new file mode 100644
index 00000000..37ac9b55
--- /dev/null
+++ b/src/llama_stack_client/types/prompt_list_response.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import TypeAlias
+
+from .prompt import Prompt
+
+__all__ = ["PromptListResponse"]
+
+PromptListResponse: TypeAlias = List[Prompt]
diff --git a/src/llama_stack_client/types/prompt_retrieve_params.py b/src/llama_stack_client/types/prompt_retrieve_params.py
new file mode 100644
index 00000000..71674dd2
--- /dev/null
+++ b/src/llama_stack_client/types/prompt_retrieve_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["PromptRetrieveParams"]
+
+
+class PromptRetrieveParams(TypedDict, total=False):
+    version: int
+    """The version of the prompt to get (defaults to latest)."""
diff --git a/src/llama_stack_client/types/prompt_set_default_version_params.py b/src/llama_stack_client/types/prompt_set_default_version_params.py
new file mode 100644
index 00000000..6b5c6130
--- /dev/null
+++ b/src/llama_stack_client/types/prompt_set_default_version_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["PromptSetDefaultVersionParams"]
+
+
+class PromptSetDefaultVersionParams(TypedDict, total=False):
+    version: Required[int]
+    """The version to set as default."""
diff --git a/src/llama_stack_client/types/prompt_update_params.py b/src/llama_stack_client/types/prompt_update_params.py
new file mode 100644
index 00000000..9753a07a
--- /dev/null
+++ b/src/llama_stack_client/types/prompt_update_params.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from .._types import SequenceNotStr
+
+__all__ = ["PromptUpdateParams"]
+
+
+class PromptUpdateParams(TypedDict, total=False):
+    prompt: Required[str]
+    """The updated prompt text content."""
+
+    set_as_default: Required[bool]
+    """Set the new version as the default (default=True)."""
+
+    version: Required[int]
+    """The current version of the prompt being updated."""
+
+    variables: SequenceNotStr[str]
+    """Updated list of variable names that can be used in the prompt template."""
diff --git a/src/llama_stack_client/types/prompts/__init__.py b/src/llama_stack_client/types/prompts/__init__.py
new file mode 100644
index 00000000..f8ee8b14
--- /dev/null
+++ b/src/llama_stack_client/types/prompts/__init__.py
@@ -0,0 +1,3 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
diff --git a/tests/api_resources/prompts/__init__.py b/tests/api_resources/prompts/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/prompts/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/prompts/test_versions.py b/tests/api_resources/prompts/test_versions.py
new file mode 100644
index 00000000..2955203b
--- /dev/null
+++ b/tests/api_resources/prompts/test_versions.py
@@ -0,0 +1,100 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types import PromptListResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestVersions:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_list(self, client: LlamaStackClient) -> None:
+        version = client.prompts.versions.list(
+            "prompt_id",
+        )
+        assert_matches_type(PromptListResponse, version, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: LlamaStackClient) -> None:
+        response = client.prompts.versions.with_raw_response.list(
+            "prompt_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        version = response.parse()
+        assert_matches_type(PromptListResponse, version, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
+        with client.prompts.versions.with_streaming_response.list(
+            "prompt_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            version = response.parse()
+            assert_matches_type(PromptListResponse, version, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `prompt_id` but received ''"):
+            client.prompts.versions.with_raw_response.list(
+                "",
+            )
+
+
+class TestAsyncVersions:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
+        version = await async_client.prompts.versions.list(
+            "prompt_id",
+        )
+        assert_matches_type(PromptListResponse, version, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.prompts.versions.with_raw_response.list(
+            "prompt_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        version = await response.parse()
+        assert_matches_type(PromptListResponse, version, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.prompts.versions.with_streaming_response.list(
+            "prompt_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            version = await response.parse()
+            assert_matches_type(PromptListResponse, version, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `prompt_id` but received ''"):
+            await async_client.prompts.versions.with_raw_response.list(
+                "",
+            )
diff --git a/tests/api_resources/test_prompts.py b/tests/api_resources/test_prompts.py
new file mode 100644
index 00000000..f09af45c
--- /dev/null
+++ b/tests/api_resources/test_prompts.py
@@ -0,0 +1,529 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types import (
+    Prompt,
+    PromptListResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestPrompts:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: LlamaStackClient) -> None:
+        prompt = client.prompts.create(
+            prompt="prompt",
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
+        prompt = client.prompts.create(
+            prompt="prompt",
+            variables=["string"],
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: LlamaStackClient) -> None:
+        response = client.prompts.with_raw_response.create(
+            prompt="prompt",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
+        with client.prompts.with_streaming_response.create(
+            prompt="prompt",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackClient) -> None:
+        prompt = client.prompts.retrieve(
+            prompt_id="prompt_id",
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
+        prompt = client.prompts.retrieve(
+            prompt_id="prompt_id",
+            version=0,
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
+        response = client.prompts.with_raw_response.retrieve(
+            prompt_id="prompt_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
+        with client.prompts.with_streaming_response.retrieve(
+            prompt_id="prompt_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `prompt_id` but received ''"):
+            client.prompts.with_raw_response.retrieve(
+                prompt_id="",
+            )
+
+    @parametrize
+    def test_method_update(self, client: LlamaStackClient) -> None:
+        prompt = client.prompts.update(
+            prompt_id="prompt_id",
+            prompt="prompt",
+            set_as_default=True,
+            version=0,
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    def test_method_update_with_all_params(self, client: LlamaStackClient) -> None:
+        prompt = client.prompts.update(
+            prompt_id="prompt_id",
+            prompt="prompt",
+            set_as_default=True,
+            version=0,
+            variables=["string"],
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: LlamaStackClient) -> None:
+        response = client.prompts.with_raw_response.update(
+            prompt_id="prompt_id",
+            prompt="prompt",
+            set_as_default=True,
+            version=0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: LlamaStackClient) -> None:
+        with client.prompts.with_streaming_response.update(
+            prompt_id="prompt_id",
+            prompt="prompt",
+            set_as_default=True,
+            version=0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `prompt_id` but received ''"):
+            client.prompts.with_raw_response.update(
+                prompt_id="",
+                prompt="prompt",
+                set_as_default=True,
+                version=0,
+            )
+
+    @parametrize
+    def test_method_list(self, client: LlamaStackClient) -> None:
+        prompt = client.prompts.list()
+        assert_matches_type(PromptListResponse, prompt, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: LlamaStackClient) -> None:
+        response = client.prompts.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = response.parse()
+        assert_matches_type(PromptListResponse, prompt, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
+        with client.prompts.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = response.parse()
+            assert_matches_type(PromptListResponse, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: LlamaStackClient) -> None:
+        prompt = client.prompts.delete(
+            "prompt_id",
+        )
+        assert prompt is None
+
+    @parametrize
+    def test_raw_response_delete(self, client: LlamaStackClient) -> None:
+        response = client.prompts.with_raw_response.delete(
+            "prompt_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = response.parse()
+        assert prompt is None
+
+    @parametrize
+    def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
+        with client.prompts.with_streaming_response.delete(
+            "prompt_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = response.parse()
+            assert prompt is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `prompt_id` but received ''"):
+            client.prompts.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    def test_method_set_default_version(self, client: LlamaStackClient) -> None:
+        prompt = client.prompts.set_default_version(
+            prompt_id="prompt_id",
+            version=0,
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    def test_raw_response_set_default_version(self, client: LlamaStackClient) -> None:
+        response = client.prompts.with_raw_response.set_default_version(
+            prompt_id="prompt_id",
+            version=0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    def test_streaming_response_set_default_version(self, client: LlamaStackClient) -> None:
+        with client.prompts.with_streaming_response.set_default_version(
+            prompt_id="prompt_id",
+            version=0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_set_default_version(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `prompt_id` but received ''"):
+            client.prompts.with_raw_response.set_default_version(
+                prompt_id="",
+                version=0,
+            )
+
+
+class TestAsyncPrompts:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
+        prompt = await async_client.prompts.create(
+            prompt="prompt",
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        prompt = await async_client.prompts.create(
+            prompt="prompt",
+            variables=["string"],
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.prompts.with_raw_response.create(
+            prompt="prompt",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = await response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.prompts.with_streaming_response.create(
+            prompt="prompt",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = await response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        prompt = await async_client.prompts.retrieve(
+            prompt_id="prompt_id",
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        prompt = await async_client.prompts.retrieve(
+            prompt_id="prompt_id",
+            version=0,
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.prompts.with_raw_response.retrieve(
+            prompt_id="prompt_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = await response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.prompts.with_streaming_response.retrieve(
+            prompt_id="prompt_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = await response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `prompt_id` but received ''"):
+            await async_client.prompts.with_raw_response.retrieve(
+                prompt_id="",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncLlamaStackClient) -> None:
+        prompt = await async_client.prompts.update(
+            prompt_id="prompt_id",
+            prompt="prompt",
+            set_as_default=True,
+            version=0,
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    async def test_method_update_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        prompt = await async_client.prompts.update(
+            prompt_id="prompt_id",
+            prompt="prompt",
+            set_as_default=True,
+            version=0,
+            variables=["string"],
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.prompts.with_raw_response.update(
+            prompt_id="prompt_id",
+            prompt="prompt",
+            set_as_default=True,
+            version=0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = await response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.prompts.with_streaming_response.update(
+            prompt_id="prompt_id",
+            prompt="prompt",
+            set_as_default=True,
+            version=0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = await response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `prompt_id` but received ''"):
+            await async_client.prompts.with_raw_response.update(
+                prompt_id="",
+                prompt="prompt",
+                set_as_default=True,
+                version=0,
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
+        prompt = await async_client.prompts.list()
+        assert_matches_type(PromptListResponse, prompt, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.prompts.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = await response.parse()
+        assert_matches_type(PromptListResponse, prompt, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.prompts.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = await response.parse()
+            assert_matches_type(PromptListResponse, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        prompt = await async_client.prompts.delete(
+            "prompt_id",
+        )
+        assert prompt is None
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.prompts.with_raw_response.delete(
+            "prompt_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = await response.parse()
+        assert prompt is None
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.prompts.with_streaming_response.delete(
+            "prompt_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = await response.parse()
+            assert prompt is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `prompt_id` but received ''"):
+            await async_client.prompts.with_raw_response.delete(
+                "",
+            )
+
+    @parametrize
+    async def test_method_set_default_version(self, async_client: AsyncLlamaStackClient) -> None:
+        prompt = await async_client.prompts.set_default_version(
+            prompt_id="prompt_id",
+            version=0,
+        )
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    async def test_raw_response_set_default_version(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.prompts.with_raw_response.set_default_version(
+            prompt_id="prompt_id",
+            version=0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        prompt = await response.parse()
+        assert_matches_type(Prompt, prompt, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_set_default_version(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.prompts.with_streaming_response.set_default_version(
+            prompt_id="prompt_id",
+            version=0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            prompt = await response.parse()
+            assert_matches_type(Prompt, prompt, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_set_default_version(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `prompt_id` but received ''"):
+            await async_client.prompts.with_raw_response.set_default_version(
+                prompt_id="",
+                version=0,
+            )

From 64b116d228d4ac7b87409f54d03c2928434fe948 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 30 Oct 2025 12:21:37 -0700
Subject: [PATCH 6/9] Restore Meta license headers to 93 files

License headers were inadvertently removed during merge. Restored the
Meta copyright and license header to all affected Python files in
alpha/beta resources, types, and tests.
---
 src/llama_stack_client/resources/alpha/__init__.py         | 7 ++++++-
 src/llama_stack_client/resources/alpha/agents/__init__.py  | 7 ++++++-
 src/llama_stack_client/resources/alpha/agents/agents.py    | 7 ++++++-
 src/llama_stack_client/resources/alpha/agents/session.py   | 7 ++++++-
 src/llama_stack_client/resources/alpha/agents/steps.py     | 7 ++++++-
 src/llama_stack_client/resources/alpha/agents/turn.py      | 7 ++++++-
 src/llama_stack_client/resources/alpha/alpha.py            | 7 ++++++-
 src/llama_stack_client/resources/alpha/benchmarks.py       | 7 ++++++-
 src/llama_stack_client/resources/alpha/eval/__init__.py    | 7 ++++++-
 src/llama_stack_client/resources/alpha/eval/eval.py        | 7 ++++++-
 src/llama_stack_client/resources/alpha/eval/jobs.py        | 7 ++++++-
 src/llama_stack_client/resources/alpha/inference.py        | 7 ++++++-
 .../resources/alpha/post_training/__init__.py              | 7 ++++++-
 .../resources/alpha/post_training/job.py                   | 7 ++++++-
 .../resources/alpha/post_training/post_training.py         | 7 ++++++-
 src/llama_stack_client/resources/beta/datasets.py          | 7 ++++++-
 src/llama_stack_client/types/alpha/agent_create_params.py  | 7 ++++++-
 .../types/alpha/agent_create_response.py                   | 7 ++++++-
 src/llama_stack_client/types/alpha/agent_list_params.py    | 7 ++++++-
 src/llama_stack_client/types/alpha/agent_list_response.py  | 7 ++++++-
 .../types/alpha/agent_retrieve_response.py                 | 7 ++++++-
 .../types/alpha/agents/agent_turn_response_stream_chunk.py | 7 ++++++-
 src/llama_stack_client/types/alpha/agents/session.py       | 7 ++++++-
 .../types/alpha/agents/session_create_params.py            | 7 ++++++-
 .../types/alpha/agents/session_create_response.py          | 7 ++++++-
 .../types/alpha/agents/session_list_params.py              | 7 ++++++-
 .../types/alpha/agents/session_list_response.py            | 7 ++++++-
 .../types/alpha/agents/session_retrieve_params.py          | 7 ++++++-
 .../types/alpha/agents/step_retrieve_response.py           | 7 ++++++-
 src/llama_stack_client/types/alpha/agents/turn.py          | 7 ++++++-
 .../types/alpha/agents/turn_create_params.py               | 7 ++++++-
 .../types/alpha/agents/turn_response_event.py              | 7 ++++++-
 .../types/alpha/agents/turn_resume_params.py               | 7 ++++++-
 .../types/alpha/algorithm_config_param.py                  | 7 ++++++-
 src/llama_stack_client/types/alpha/benchmark.py            | 7 ++++++-
 .../types/alpha/benchmark_config_param.py                  | 7 ++++++-
 .../types/alpha/benchmark_list_response.py                 | 7 ++++++-
 .../types/alpha/benchmark_register_params.py               | 7 ++++++-
 .../types/alpha/eval_evaluate_rows_alpha_params.py         | 7 ++++++-
 .../types/alpha/eval_evaluate_rows_params.py               | 7 ++++++-
 .../types/alpha/eval_run_eval_alpha_params.py              | 7 ++++++-
 src/llama_stack_client/types/alpha/eval_run_eval_params.py | 7 ++++++-
 src/llama_stack_client/types/alpha/evaluate_response.py    | 7 ++++++-
 .../types/alpha/inference_rerank_params.py                 | 7 ++++++-
 .../types/alpha/inference_rerank_response.py               | 7 ++++++-
 src/llama_stack_client/types/alpha/inference_step.py       | 7 ++++++-
 src/llama_stack_client/types/alpha/job.py                  | 7 ++++++-
 .../types/alpha/list_benchmarks_response.py                | 7 ++++++-
 .../types/alpha/list_post_training_jobs_response.py        | 7 ++++++-
 .../types/alpha/memory_retrieval_step.py                   | 7 ++++++-
 .../types/alpha/post_training/job_artifacts_params.py      | 7 ++++++-
 .../types/alpha/post_training/job_artifacts_response.py    | 7 ++++++-
 .../types/alpha/post_training/job_cancel_params.py         | 7 ++++++-
 .../types/alpha/post_training/job_list_response.py         | 7 ++++++-
 .../types/alpha/post_training/job_status_params.py         | 7 ++++++-
 .../types/alpha/post_training/job_status_response.py       | 7 ++++++-
 src/llama_stack_client/types/alpha/post_training_job.py    | 7 ++++++-
 .../alpha/post_training_preference_optimize_params.py      | 7 ++++++-
 .../alpha/post_training_supervised_fine_tune_params.py     | 7 ++++++-
 src/llama_stack_client/types/alpha/shield_call_step.py     | 7 ++++++-
 src/llama_stack_client/types/alpha/tool_execution_step.py  | 7 ++++++-
 src/llama_stack_client/types/alpha/tool_response.py        | 7 ++++++-
 src/llama_stack_client/types/alpha/tool_response_param.py  | 7 ++++++-
 .../types/beta/dataset_appendrows_params.py                | 7 ++++++-
 .../types/beta/dataset_iterrows_params.py                  | 7 ++++++-
 .../types/beta/dataset_iterrows_response.py                | 7 ++++++-
 src/llama_stack_client/types/beta/dataset_list_response.py | 7 ++++++-
 .../types/beta/dataset_register_params.py                  | 7 ++++++-
 .../types/beta/dataset_register_response.py                | 7 ++++++-
 .../types/beta/dataset_retrieve_response.py                | 7 ++++++-
 .../types/beta/list_datasets_response.py                   | 7 ++++++-
 src/llama_stack_client/types/shared/agent_config.py        | 7 ++++++-
 src/llama_stack_client/types/shared/response_format.py     | 7 ++++++-
 src/llama_stack_client/types/shared/sampling_params.py     | 7 ++++++-
 src/llama_stack_client/types/shared_params/agent_config.py | 7 ++++++-
 .../types/shared_params/response_format.py                 | 7 ++++++-
 .../types/shared_params/sampling_params.py                 | 7 ++++++-
 src/llama_stack_client/types/tool_def_param.py             | 7 ++++++-
 tests/api_resources/alpha/__init__.py                      | 7 +++++++
 tests/api_resources/alpha/agents/__init__.py               | 7 +++++++
 tests/api_resources/alpha/agents/test_session.py           | 7 ++++++-
 tests/api_resources/alpha/agents/test_steps.py             | 7 ++++++-
 tests/api_resources/alpha/agents/test_turn.py              | 7 ++++++-
 tests/api_resources/alpha/eval/__init__.py                 | 7 +++++++
 tests/api_resources/alpha/eval/test_jobs.py                | 7 ++++++-
 tests/api_resources/alpha/post_training/__init__.py        | 7 +++++++
 tests/api_resources/alpha/post_training/test_job.py        | 7 ++++++-
 tests/api_resources/alpha/test_agents.py                   | 7 ++++++-
 tests/api_resources/alpha/test_benchmarks.py               | 7 ++++++-
 tests/api_resources/alpha/test_eval.py                     | 7 ++++++-
 tests/api_resources/alpha/test_inference.py                | 7 ++++++-
 tests/api_resources/alpha/test_post_training.py            | 7 ++++++-
 tests/api_resources/beta/test_datasets.py                  | 7 ++++++-
 93 files changed, 562 insertions(+), 89 deletions(-)

diff --git a/src/llama_stack_client/resources/alpha/__init__.py b/src/llama_stack_client/resources/alpha/__init__.py
index 84d9534a..06686720 100644
--- a/src/llama_stack_client/resources/alpha/__init__.py
+++ b/src/llama_stack_client/resources/alpha/__init__.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from .eval import (
     EvalResource,
     AsyncEvalResource,
diff --git a/src/llama_stack_client/resources/alpha/agents/__init__.py b/src/llama_stack_client/resources/alpha/agents/__init__.py
index 17f0098f..c7e9db03 100644
--- a/src/llama_stack_client/resources/alpha/agents/__init__.py
+++ b/src/llama_stack_client/resources/alpha/agents/__init__.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from .turn import (
     TurnResource,
     AsyncTurnResource,
diff --git a/src/llama_stack_client/resources/alpha/agents/agents.py b/src/llama_stack_client/resources/alpha/agents/agents.py
index 0e81cce7..3aacfcec 100644
--- a/src/llama_stack_client/resources/alpha/agents/agents.py
+++ b/src/llama_stack_client/resources/alpha/agents/agents.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import httpx
diff --git a/src/llama_stack_client/resources/alpha/agents/session.py b/src/llama_stack_client/resources/alpha/agents/session.py
index 2e980add..08aaa938 100644
--- a/src/llama_stack_client/resources/alpha/agents/session.py
+++ b/src/llama_stack_client/resources/alpha/agents/session.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import httpx
diff --git a/src/llama_stack_client/resources/alpha/agents/steps.py b/src/llama_stack_client/resources/alpha/agents/steps.py
index 838822d0..7b6a9093 100644
--- a/src/llama_stack_client/resources/alpha/agents/steps.py
+++ b/src/llama_stack_client/resources/alpha/agents/steps.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import httpx
diff --git a/src/llama_stack_client/resources/alpha/agents/turn.py b/src/llama_stack_client/resources/alpha/agents/turn.py
index ffe766b6..4a074fa5 100644
--- a/src/llama_stack_client/resources/alpha/agents/turn.py
+++ b/src/llama_stack_client/resources/alpha/agents/turn.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Iterable
diff --git a/src/llama_stack_client/resources/alpha/alpha.py b/src/llama_stack_client/resources/alpha/alpha.py
index 63ae7e3c..8ab9164b 100644
--- a/src/llama_stack_client/resources/alpha/alpha.py
+++ b/src/llama_stack_client/resources/alpha/alpha.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from ..._compat import cached_property
diff --git a/src/llama_stack_client/resources/alpha/benchmarks.py b/src/llama_stack_client/resources/alpha/benchmarks.py
index 333b9578..760a9f29 100644
--- a/src/llama_stack_client/resources/alpha/benchmarks.py
+++ b/src/llama_stack_client/resources/alpha/benchmarks.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Type, Union, Iterable, cast
diff --git a/src/llama_stack_client/resources/alpha/eval/__init__.py b/src/llama_stack_client/resources/alpha/eval/__init__.py
index f6473395..fd978681 100644
--- a/src/llama_stack_client/resources/alpha/eval/__init__.py
+++ b/src/llama_stack_client/resources/alpha/eval/__init__.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from .eval import (
     EvalResource,
     AsyncEvalResource,
diff --git a/src/llama_stack_client/resources/alpha/eval/eval.py b/src/llama_stack_client/resources/alpha/eval/eval.py
index b5347c0b..df84066f 100644
--- a/src/llama_stack_client/resources/alpha/eval/eval.py
+++ b/src/llama_stack_client/resources/alpha/eval/eval.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/resources/alpha/eval/jobs.py b/src/llama_stack_client/resources/alpha/eval/jobs.py
index 8f0fa026..6ead48df 100644
--- a/src/llama_stack_client/resources/alpha/eval/jobs.py
+++ b/src/llama_stack_client/resources/alpha/eval/jobs.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import httpx
diff --git a/src/llama_stack_client/resources/alpha/inference.py b/src/llama_stack_client/resources/alpha/inference.py
index ca259357..bfa32d14 100644
--- a/src/llama_stack_client/resources/alpha/inference.py
+++ b/src/llama_stack_client/resources/alpha/inference.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Type, cast
diff --git a/src/llama_stack_client/resources/alpha/post_training/__init__.py b/src/llama_stack_client/resources/alpha/post_training/__init__.py
index e1fa2361..65afcf0f 100644
--- a/src/llama_stack_client/resources/alpha/post_training/__init__.py
+++ b/src/llama_stack_client/resources/alpha/post_training/__init__.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from .job import (
     JobResource,
     AsyncJobResource,
diff --git a/src/llama_stack_client/resources/alpha/post_training/job.py b/src/llama_stack_client/resources/alpha/post_training/job.py
index d9b7173e..b1fb6c3a 100644
--- a/src/llama_stack_client/resources/alpha/post_training/job.py
+++ b/src/llama_stack_client/resources/alpha/post_training/job.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Type, cast
diff --git a/src/llama_stack_client/resources/alpha/post_training/post_training.py b/src/llama_stack_client/resources/alpha/post_training/post_training.py
index a26c813a..b1428210 100644
--- a/src/llama_stack_client/resources/alpha/post_training/post_training.py
+++ b/src/llama_stack_client/resources/alpha/post_training/post_training.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/resources/beta/datasets.py b/src/llama_stack_client/resources/beta/datasets.py
index 1b924b28..1c332f6e 100644
--- a/src/llama_stack_client/resources/beta/datasets.py
+++ b/src/llama_stack_client/resources/beta/datasets.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Type, Union, Iterable, cast
diff --git a/src/llama_stack_client/types/alpha/agent_create_params.py b/src/llama_stack_client/types/alpha/agent_create_params.py
index 368704b2..43c32d8f 100644
--- a/src/llama_stack_client/types/alpha/agent_create_params.py
+++ b/src/llama_stack_client/types/alpha/agent_create_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/agent_create_response.py b/src/llama_stack_client/types/alpha/agent_create_response.py
index 9b155198..45e53b72 100644
--- a/src/llama_stack_client/types/alpha/agent_create_response.py
+++ b/src/llama_stack_client/types/alpha/agent_create_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from ..._models import BaseModel
 
 __all__ = ["AgentCreateResponse"]
diff --git a/src/llama_stack_client/types/alpha/agent_list_params.py b/src/llama_stack_client/types/alpha/agent_list_params.py
index 15da545b..e1f5e99d 100644
--- a/src/llama_stack_client/types/alpha/agent_list_params.py
+++ b/src/llama_stack_client/types/alpha/agent_list_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing_extensions import TypedDict
diff --git a/src/llama_stack_client/types/alpha/agent_list_response.py b/src/llama_stack_client/types/alpha/agent_list_response.py
index 69de5001..993de5da 100644
--- a/src/llama_stack_client/types/alpha/agent_list_response.py
+++ b/src/llama_stack_client/types/alpha/agent_list_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Dict, List, Union, Optional
 
 from ..._models import BaseModel
diff --git a/src/llama_stack_client/types/alpha/agent_retrieve_response.py b/src/llama_stack_client/types/alpha/agent_retrieve_response.py
index 87d79b7b..3a597e7e 100644
--- a/src/llama_stack_client/types/alpha/agent_retrieve_response.py
+++ b/src/llama_stack_client/types/alpha/agent_retrieve_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from datetime import datetime
 
 from ..._models import BaseModel
diff --git a/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py b/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
index c45bf756..13b12877 100644
--- a/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
+++ b/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from ...._models import BaseModel
 from .turn_response_event import TurnResponseEvent
 
diff --git a/src/llama_stack_client/types/alpha/agents/session.py b/src/llama_stack_client/types/alpha/agents/session.py
index 9b60853a..865e419a 100644
--- a/src/llama_stack_client/types/alpha/agents/session.py
+++ b/src/llama_stack_client/types/alpha/agents/session.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import List
 from datetime import datetime
 
diff --git a/src/llama_stack_client/types/alpha/agents/session_create_params.py b/src/llama_stack_client/types/alpha/agents/session_create_params.py
index 5f421ae9..f0009504 100644
--- a/src/llama_stack_client/types/alpha/agents/session_create_params.py
+++ b/src/llama_stack_client/types/alpha/agents/session_create_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/agents/session_create_response.py b/src/llama_stack_client/types/alpha/agents/session_create_response.py
index 7d30c61a..f35becd4 100644
--- a/src/llama_stack_client/types/alpha/agents/session_create_response.py
+++ b/src/llama_stack_client/types/alpha/agents/session_create_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from ...._models import BaseModel
 
 __all__ = ["SessionCreateResponse"]
diff --git a/src/llama_stack_client/types/alpha/agents/session_list_params.py b/src/llama_stack_client/types/alpha/agents/session_list_params.py
index 0644d1ae..d1546116 100644
--- a/src/llama_stack_client/types/alpha/agents/session_list_params.py
+++ b/src/llama_stack_client/types/alpha/agents/session_list_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing_extensions import TypedDict
diff --git a/src/llama_stack_client/types/alpha/agents/session_list_response.py b/src/llama_stack_client/types/alpha/agents/session_list_response.py
index 23a51baf..09e59c52 100644
--- a/src/llama_stack_client/types/alpha/agents/session_list_response.py
+++ b/src/llama_stack_client/types/alpha/agents/session_list_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Dict, List, Union, Optional
 
 from ...._models import BaseModel
diff --git a/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py b/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
index 116190cc..a2cb53c0 100644
--- a/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
+++ b/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py b/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
index 55b64355..ab18c945 100644
--- a/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
+++ b/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Union
 from typing_extensions import Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/alpha/agents/turn.py b/src/llama_stack_client/types/alpha/agents/turn.py
index 74ef22aa..b258dbca 100644
--- a/src/llama_stack_client/types/alpha/agents/turn.py
+++ b/src/llama_stack_client/types/alpha/agents/turn.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import List, Union, Optional
 from datetime import datetime
 from typing_extensions import Literal, Annotated, TypeAlias
diff --git a/src/llama_stack_client/types/alpha/agents/turn_create_params.py b/src/llama_stack_client/types/alpha/agents/turn_create_params.py
index 7225959a..c4e810fa 100644
--- a/src/llama_stack_client/types/alpha/agents/turn_create_params.py
+++ b/src/llama_stack_client/types/alpha/agents/turn_create_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/agents/turn_response_event.py b/src/llama_stack_client/types/alpha/agents/turn_response_event.py
index c162135d..70f92073 100644
--- a/src/llama_stack_client/types/alpha/agents/turn_response_event.py
+++ b/src/llama_stack_client/types/alpha/agents/turn_response_event.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/alpha/agents/turn_resume_params.py b/src/llama_stack_client/types/alpha/agents/turn_resume_params.py
index 554e3578..7f4d205a 100644
--- a/src/llama_stack_client/types/alpha/agents/turn_resume_params.py
+++ b/src/llama_stack_client/types/alpha/agents/turn_resume_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/algorithm_config_param.py b/src/llama_stack_client/types/alpha/algorithm_config_param.py
index d6da8130..df015181 100644
--- a/src/llama_stack_client/types/alpha/algorithm_config_param.py
+++ b/src/llama_stack_client/types/alpha/algorithm_config_param.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Union
diff --git a/src/llama_stack_client/types/alpha/benchmark.py b/src/llama_stack_client/types/alpha/benchmark.py
index 4313a7af..9a0a10ce 100644
--- a/src/llama_stack_client/types/alpha/benchmark.py
+++ b/src/llama_stack_client/types/alpha/benchmark.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal
 
diff --git a/src/llama_stack_client/types/alpha/benchmark_config_param.py b/src/llama_stack_client/types/alpha/benchmark_config_param.py
index 4a3ea512..c5f88ef4 100644
--- a/src/llama_stack_client/types/alpha/benchmark_config_param.py
+++ b/src/llama_stack_client/types/alpha/benchmark_config_param.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union
diff --git a/src/llama_stack_client/types/alpha/benchmark_list_response.py b/src/llama_stack_client/types/alpha/benchmark_list_response.py
index b2e8ad2b..8bc89245 100644
--- a/src/llama_stack_client/types/alpha/benchmark_list_response.py
+++ b/src/llama_stack_client/types/alpha/benchmark_list_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import List
 from typing_extensions import TypeAlias
 
diff --git a/src/llama_stack_client/types/alpha/benchmark_register_params.py b/src/llama_stack_client/types/alpha/benchmark_register_params.py
index c8cb02ff..cc8fdd93 100644
--- a/src/llama_stack_client/types/alpha/benchmark_register_params.py
+++ b/src/llama_stack_client/types/alpha/benchmark_register_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py b/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
index 0422e224..7d5ffe14 100644
--- a/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
+++ b/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py b/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
index 4ff9bd5b..fe284e4d 100644
--- a/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
+++ b/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
index e07393b3..8dc839cb 100644
--- a/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
+++ b/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/eval_run_eval_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_params.py
index 33596fc2..d65931b4 100644
--- a/src/llama_stack_client/types/alpha/eval_run_eval_params.py
+++ b/src/llama_stack_client/types/alpha/eval_run_eval_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/evaluate_response.py b/src/llama_stack_client/types/alpha/evaluate_response.py
index 4cd2e0f7..18141364 100644
--- a/src/llama_stack_client/types/alpha/evaluate_response.py
+++ b/src/llama_stack_client/types/alpha/evaluate_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Dict, List, Union
 
 from ..._models import BaseModel
diff --git a/src/llama_stack_client/types/alpha/inference_rerank_params.py b/src/llama_stack_client/types/alpha/inference_rerank_params.py
index 4c506240..5f12dfe9 100644
--- a/src/llama_stack_client/types/alpha/inference_rerank_params.py
+++ b/src/llama_stack_client/types/alpha/inference_rerank_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Union
diff --git a/src/llama_stack_client/types/alpha/inference_rerank_response.py b/src/llama_stack_client/types/alpha/inference_rerank_response.py
index 391f8a3b..ef158300 100644
--- a/src/llama_stack_client/types/alpha/inference_rerank_response.py
+++ b/src/llama_stack_client/types/alpha/inference_rerank_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import List
 from typing_extensions import TypeAlias
 
diff --git a/src/llama_stack_client/types/alpha/inference_step.py b/src/llama_stack_client/types/alpha/inference_step.py
index a7e446d1..67299802 100644
--- a/src/llama_stack_client/types/alpha/inference_step.py
+++ b/src/llama_stack_client/types/alpha/inference_step.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Optional
 from datetime import datetime
 from typing_extensions import Literal
diff --git a/src/llama_stack_client/types/alpha/job.py b/src/llama_stack_client/types/alpha/job.py
index 23506692..57e6b399 100644
--- a/src/llama_stack_client/types/alpha/job.py
+++ b/src/llama_stack_client/types/alpha/job.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing_extensions import Literal
 
 from ..._models import BaseModel
diff --git a/src/llama_stack_client/types/alpha/list_benchmarks_response.py b/src/llama_stack_client/types/alpha/list_benchmarks_response.py
index accaf36c..ca3d5462 100644
--- a/src/llama_stack_client/types/alpha/list_benchmarks_response.py
+++ b/src/llama_stack_client/types/alpha/list_benchmarks_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from ..._models import BaseModel
 from .benchmark_list_response import BenchmarkListResponse
 
diff --git a/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py b/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
index 7af3bd96..d369c7d4 100644
--- a/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
+++ b/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from ..._models import BaseModel
 from .post_training.job_list_response import JobListResponse
 
diff --git a/src/llama_stack_client/types/alpha/memory_retrieval_step.py b/src/llama_stack_client/types/alpha/memory_retrieval_step.py
index 787453af..9929137c 100644
--- a/src/llama_stack_client/types/alpha/memory_retrieval_step.py
+++ b/src/llama_stack_client/types/alpha/memory_retrieval_step.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Optional
 from datetime import datetime
 from typing_extensions import Literal
diff --git a/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py b/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
index 851ebf5f..537f2326 100644
--- a/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py b/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
index 74edff26..07b813d2 100644
--- a/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import List, Optional
 from datetime import datetime
 
diff --git a/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py b/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
index 3a976e87..d110a44b 100644
--- a/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/post_training/job_list_response.py b/src/llama_stack_client/types/alpha/post_training/job_list_response.py
index 33bd89f1..2af5f14b 100644
--- a/src/llama_stack_client/types/alpha/post_training/job_list_response.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_list_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import List
 from typing_extensions import TypeAlias
 
diff --git a/src/llama_stack_client/types/alpha/post_training/job_status_params.py b/src/llama_stack_client/types/alpha/post_training/job_status_params.py
index d5e040e0..6ae01343 100644
--- a/src/llama_stack_client/types/alpha/post_training/job_status_params.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_status_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/post_training/job_status_response.py b/src/llama_stack_client/types/alpha/post_training/job_status_response.py
index 1ccc9ca2..ebb135d7 100644
--- a/src/llama_stack_client/types/alpha/post_training/job_status_response.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_status_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Dict, List, Union, Optional
 from datetime import datetime
 from typing_extensions import Literal
diff --git a/src/llama_stack_client/types/alpha/post_training_job.py b/src/llama_stack_client/types/alpha/post_training_job.py
index 7d9417db..3446fa70 100644
--- a/src/llama_stack_client/types/alpha/post_training_job.py
+++ b/src/llama_stack_client/types/alpha/post_training_job.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from ..._models import BaseModel
 
 __all__ = ["PostTrainingJob"]
diff --git a/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py b/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
index 2dcd294d..b1555e62 100644
--- a/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
+++ b/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py b/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
index c23796f0..730f2460 100644
--- a/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
+++ b/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/shield_call_step.py b/src/llama_stack_client/types/alpha/shield_call_step.py
index 80176555..f1fe1804 100644
--- a/src/llama_stack_client/types/alpha/shield_call_step.py
+++ b/src/llama_stack_client/types/alpha/shield_call_step.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Optional
 from datetime import datetime
 from typing_extensions import Literal
diff --git a/src/llama_stack_client/types/alpha/tool_execution_step.py b/src/llama_stack_client/types/alpha/tool_execution_step.py
index 1761e889..b6f638b1 100644
--- a/src/llama_stack_client/types/alpha/tool_execution_step.py
+++ b/src/llama_stack_client/types/alpha/tool_execution_step.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import List, Optional
 from datetime import datetime
 from typing_extensions import Literal
diff --git a/src/llama_stack_client/types/alpha/tool_response.py b/src/llama_stack_client/types/alpha/tool_response.py
index fb749f75..56b8ce46 100644
--- a/src/llama_stack_client/types/alpha/tool_response.py
+++ b/src/llama_stack_client/types/alpha/tool_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal
 
diff --git a/src/llama_stack_client/types/alpha/tool_response_param.py b/src/llama_stack_client/types/alpha/tool_response_param.py
index e833211f..4c49f3c9 100644
--- a/src/llama_stack_client/types/alpha/tool_response_param.py
+++ b/src/llama_stack_client/types/alpha/tool_response_param.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/beta/dataset_appendrows_params.py b/src/llama_stack_client/types/beta/dataset_appendrows_params.py
index 2e96e124..4a1cd708 100644
--- a/src/llama_stack_client/types/beta/dataset_appendrows_params.py
+++ b/src/llama_stack_client/types/beta/dataset_appendrows_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/beta/dataset_iterrows_params.py b/src/llama_stack_client/types/beta/dataset_iterrows_params.py
index 99065312..e3c0feae 100644
--- a/src/llama_stack_client/types/beta/dataset_iterrows_params.py
+++ b/src/llama_stack_client/types/beta/dataset_iterrows_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing_extensions import TypedDict
diff --git a/src/llama_stack_client/types/beta/dataset_iterrows_response.py b/src/llama_stack_client/types/beta/dataset_iterrows_response.py
index ec7d06b4..03344c76 100644
--- a/src/llama_stack_client/types/beta/dataset_iterrows_response.py
+++ b/src/llama_stack_client/types/beta/dataset_iterrows_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Dict, List, Union, Optional
 
 from ..._models import BaseModel
diff --git a/src/llama_stack_client/types/beta/dataset_list_response.py b/src/llama_stack_client/types/beta/dataset_list_response.py
index 2553a1a3..11ef892d 100644
--- a/src/llama_stack_client/types/beta/dataset_list_response.py
+++ b/src/llama_stack_client/types/beta/dataset_list_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/beta/dataset_register_params.py b/src/llama_stack_client/types/beta/dataset_register_params.py
index 6fd5db3f..21979b13 100644
--- a/src/llama_stack_client/types/beta/dataset_register_params.py
+++ b/src/llama_stack_client/types/beta/dataset_register_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/beta/dataset_register_response.py b/src/llama_stack_client/types/beta/dataset_register_response.py
index ee12b860..398c91cd 100644
--- a/src/llama_stack_client/types/beta/dataset_register_response.py
+++ b/src/llama_stack_client/types/beta/dataset_register_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/beta/dataset_retrieve_response.py b/src/llama_stack_client/types/beta/dataset_retrieve_response.py
index 5e2cc0ca..e37b166e 100644
--- a/src/llama_stack_client/types/beta/dataset_retrieve_response.py
+++ b/src/llama_stack_client/types/beta/dataset_retrieve_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/beta/list_datasets_response.py b/src/llama_stack_client/types/beta/list_datasets_response.py
index 7caa3220..2ecaba4e 100644
--- a/src/llama_stack_client/types/beta/list_datasets_response.py
+++ b/src/llama_stack_client/types/beta/list_datasets_response.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from ..._models import BaseModel
 from .dataset_list_response import DatasetListResponse
 
diff --git a/src/llama_stack_client/types/shared/agent_config.py b/src/llama_stack_client/types/shared/agent_config.py
index eb116159..28b26397 100644
--- a/src/llama_stack_client/types/shared/agent_config.py
+++ b/src/llama_stack_client/types/shared/agent_config.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, TypeAlias
 
diff --git a/src/llama_stack_client/types/shared/response_format.py b/src/llama_stack_client/types/shared/response_format.py
index 537df8d5..e996d9ff 100644
--- a/src/llama_stack_client/types/shared/response_format.py
+++ b/src/llama_stack_client/types/shared/response_format.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import Dict, List, Union
 from typing_extensions import Literal, Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/shared/sampling_params.py b/src/llama_stack_client/types/shared/sampling_params.py
index 6823aee7..622687a0 100644
--- a/src/llama_stack_client/types/shared/sampling_params.py
+++ b/src/llama_stack_client/types/shared/sampling_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from typing import List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/shared_params/agent_config.py b/src/llama_stack_client/types/shared_params/agent_config.py
index c1206bd5..de04c83d 100644
--- a/src/llama_stack_client/types/shared_params/agent_config.py
+++ b/src/llama_stack_client/types/shared_params/agent_config.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/shared_params/response_format.py b/src/llama_stack_client/types/shared_params/response_format.py
index 53411700..b82680ed 100644
--- a/src/llama_stack_client/types/shared_params/response_format.py
+++ b/src/llama_stack_client/types/shared_params/response_format.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/shared_params/sampling_params.py b/src/llama_stack_client/types/shared_params/sampling_params.py
index e5eebddd..5e762498 100644
--- a/src/llama_stack_client/types/shared_params/sampling_params.py
+++ b/src/llama_stack_client/types/shared_params/sampling_params.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Union
diff --git a/src/llama_stack_client/types/tool_def_param.py b/src/llama_stack_client/types/tool_def_param.py
index d14ef6cc..8e41b121 100644
--- a/src/llama_stack_client/types/tool_def_param.py
+++ b/src/llama_stack_client/types/tool_def_param.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/tests/api_resources/alpha/__init__.py b/tests/api_resources/alpha/__init__.py
index fd8019a9..fe2f0b1c 100644
--- a/tests/api_resources/alpha/__init__.py
+++ b/tests/api_resources/alpha/__init__.py
@@ -1 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
diff --git a/tests/api_resources/alpha/agents/__init__.py b/tests/api_resources/alpha/agents/__init__.py
index fd8019a9..fe2f0b1c 100644
--- a/tests/api_resources/alpha/agents/__init__.py
+++ b/tests/api_resources/alpha/agents/__init__.py
@@ -1 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
diff --git a/tests/api_resources/alpha/agents/test_session.py b/tests/api_resources/alpha/agents/test_session.py
index 9c49e6bc..8a3fad4f 100644
--- a/tests/api_resources/alpha/agents/test_session.py
+++ b/tests/api_resources/alpha/agents/test_session.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/agents/test_steps.py b/tests/api_resources/alpha/agents/test_steps.py
index 5bf35fc3..f099b2d6 100644
--- a/tests/api_resources/alpha/agents/test_steps.py
+++ b/tests/api_resources/alpha/agents/test_steps.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/agents/test_turn.py b/tests/api_resources/alpha/agents/test_turn.py
index 9a2a500f..0f56eef1 100644
--- a/tests/api_resources/alpha/agents/test_turn.py
+++ b/tests/api_resources/alpha/agents/test_turn.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/eval/__init__.py b/tests/api_resources/alpha/eval/__init__.py
index fd8019a9..fe2f0b1c 100644
--- a/tests/api_resources/alpha/eval/__init__.py
+++ b/tests/api_resources/alpha/eval/__init__.py
@@ -1 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
diff --git a/tests/api_resources/alpha/eval/test_jobs.py b/tests/api_resources/alpha/eval/test_jobs.py
index f4ea9ce1..c521d9ee 100644
--- a/tests/api_resources/alpha/eval/test_jobs.py
+++ b/tests/api_resources/alpha/eval/test_jobs.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/post_training/__init__.py b/tests/api_resources/alpha/post_training/__init__.py
index fd8019a9..fe2f0b1c 100644
--- a/tests/api_resources/alpha/post_training/__init__.py
+++ b/tests/api_resources/alpha/post_training/__init__.py
@@ -1 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
diff --git a/tests/api_resources/alpha/post_training/test_job.py b/tests/api_resources/alpha/post_training/test_job.py
index bec18796..568c175b 100644
--- a/tests/api_resources/alpha/post_training/test_job.py
+++ b/tests/api_resources/alpha/post_training/test_job.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/test_agents.py b/tests/api_resources/alpha/test_agents.py
index 075bd478..09a6ce17 100644
--- a/tests/api_resources/alpha/test_agents.py
+++ b/tests/api_resources/alpha/test_agents.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/test_benchmarks.py b/tests/api_resources/alpha/test_benchmarks.py
index 98652091..a00a6ab1 100644
--- a/tests/api_resources/alpha/test_benchmarks.py
+++ b/tests/api_resources/alpha/test_benchmarks.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/test_eval.py b/tests/api_resources/alpha/test_eval.py
index 88bd0c0c..3e8f0110 100644
--- a/tests/api_resources/alpha/test_eval.py
+++ b/tests/api_resources/alpha/test_eval.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/test_inference.py b/tests/api_resources/alpha/test_inference.py
index 551e2213..98565983 100644
--- a/tests/api_resources/alpha/test_inference.py
+++ b/tests/api_resources/alpha/test_inference.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/test_post_training.py b/tests/api_resources/alpha/test_post_training.py
index 14229811..c9827a1a 100644
--- a/tests/api_resources/alpha/test_post_training.py
+++ b/tests/api_resources/alpha/test_post_training.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/beta/test_datasets.py b/tests/api_resources/beta/test_datasets.py
index 3d035a16..23798e8d 100644
--- a/tests/api_resources/beta/test_datasets.py
+++ b/tests/api_resources/beta/test_datasets.py
@@ -1,5 +1,10 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
 
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 from __future__ import annotations
 
 import os

From 55a8efc0a60f44c8c93e18b2b60215f051405be4 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 30 Oct 2025 13:26:22 -0700
Subject: [PATCH 7/9] fix(headers): add a newline

---
 src/llama_stack_client/resources/alpha/__init__.py               | 1 +
 src/llama_stack_client/resources/alpha/agents/__init__.py        | 1 +
 src/llama_stack_client/resources/alpha/agents/agents.py          | 1 +
 src/llama_stack_client/resources/alpha/agents/session.py         | 1 +
 src/llama_stack_client/resources/alpha/agents/steps.py           | 1 +
 src/llama_stack_client/resources/alpha/agents/turn.py            | 1 +
 src/llama_stack_client/resources/alpha/alpha.py                  | 1 +
 src/llama_stack_client/resources/alpha/benchmarks.py             | 1 +
 src/llama_stack_client/resources/alpha/eval/__init__.py          | 1 +
 src/llama_stack_client/resources/alpha/eval/eval.py              | 1 +
 src/llama_stack_client/resources/alpha/eval/jobs.py              | 1 +
 src/llama_stack_client/resources/alpha/inference.py              | 1 +
 src/llama_stack_client/resources/alpha/post_training/__init__.py | 1 +
 src/llama_stack_client/resources/alpha/post_training/job.py      | 1 +
 .../resources/alpha/post_training/post_training.py               | 1 +
 src/llama_stack_client/resources/beta/datasets.py                | 1 +
 src/llama_stack_client/types/alpha/agent_create_params.py        | 1 +
 src/llama_stack_client/types/alpha/agent_create_response.py      | 1 +
 src/llama_stack_client/types/alpha/agent_list_params.py          | 1 +
 src/llama_stack_client/types/alpha/agent_list_response.py        | 1 +
 src/llama_stack_client/types/alpha/agent_retrieve_response.py    | 1 +
 .../types/alpha/agents/agent_turn_response_stream_chunk.py       | 1 +
 src/llama_stack_client/types/alpha/agents/session.py             | 1 +
 .../types/alpha/agents/session_create_params.py                  | 1 +
 .../types/alpha/agents/session_create_response.py                | 1 +
 src/llama_stack_client/types/alpha/agents/session_list_params.py | 1 +
 .../types/alpha/agents/session_list_response.py                  | 1 +
 .../types/alpha/agents/session_retrieve_params.py                | 1 +
 .../types/alpha/agents/step_retrieve_response.py                 | 1 +
 src/llama_stack_client/types/alpha/agents/turn.py                | 1 +
 src/llama_stack_client/types/alpha/agents/turn_create_params.py  | 1 +
 src/llama_stack_client/types/alpha/agents/turn_response_event.py | 1 +
 src/llama_stack_client/types/alpha/agents/turn_resume_params.py  | 1 +
 src/llama_stack_client/types/alpha/algorithm_config_param.py     | 1 +
 src/llama_stack_client/types/alpha/benchmark.py                  | 1 +
 src/llama_stack_client/types/alpha/benchmark_config_param.py     | 1 +
 src/llama_stack_client/types/alpha/benchmark_list_response.py    | 1 +
 src/llama_stack_client/types/alpha/benchmark_register_params.py  | 1 +
 .../types/alpha/eval_evaluate_rows_alpha_params.py               | 1 +
 src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py  | 1 +
 src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py | 1 +
 src/llama_stack_client/types/alpha/eval_run_eval_params.py       | 1 +
 src/llama_stack_client/types/alpha/evaluate_response.py          | 1 +
 src/llama_stack_client/types/alpha/inference_rerank_params.py    | 1 +
 src/llama_stack_client/types/alpha/inference_rerank_response.py  | 1 +
 src/llama_stack_client/types/alpha/inference_step.py             | 1 +
 src/llama_stack_client/types/alpha/job.py                        | 1 +
 src/llama_stack_client/types/alpha/list_benchmarks_response.py   | 1 +
 .../types/alpha/list_post_training_jobs_response.py              | 1 +
 src/llama_stack_client/types/alpha/memory_retrieval_step.py      | 1 +
 .../types/alpha/post_training/job_artifacts_params.py            | 1 +
 .../types/alpha/post_training/job_artifacts_response.py          | 1 +
 .../types/alpha/post_training/job_cancel_params.py               | 1 +
 .../types/alpha/post_training/job_list_response.py               | 1 +
 .../types/alpha/post_training/job_status_params.py               | 1 +
 .../types/alpha/post_training/job_status_response.py             | 1 +
 src/llama_stack_client/types/alpha/post_training_job.py          | 1 +
 .../types/alpha/post_training_preference_optimize_params.py      | 1 +
 .../types/alpha/post_training_supervised_fine_tune_params.py     | 1 +
 src/llama_stack_client/types/alpha/shield_call_step.py           | 1 +
 src/llama_stack_client/types/alpha/tool_execution_step.py        | 1 +
 src/llama_stack_client/types/alpha/tool_response.py              | 1 +
 src/llama_stack_client/types/alpha/tool_response_param.py        | 1 +
 src/llama_stack_client/types/beta/dataset_appendrows_params.py   | 1 +
 src/llama_stack_client/types/beta/dataset_iterrows_params.py     | 1 +
 src/llama_stack_client/types/beta/dataset_iterrows_response.py   | 1 +
 src/llama_stack_client/types/beta/dataset_list_response.py       | 1 +
 src/llama_stack_client/types/beta/dataset_register_params.py     | 1 +
 src/llama_stack_client/types/beta/dataset_register_response.py   | 1 +
 src/llama_stack_client/types/beta/dataset_retrieve_response.py   | 1 +
 src/llama_stack_client/types/beta/list_datasets_response.py      | 1 +
 src/llama_stack_client/types/shared/agent_config.py              | 1 +
 src/llama_stack_client/types/shared/response_format.py           | 1 +
 src/llama_stack_client/types/shared/sampling_params.py           | 1 +
 src/llama_stack_client/types/shared_params/agent_config.py       | 1 +
 src/llama_stack_client/types/shared_params/response_format.py    | 1 +
 src/llama_stack_client/types/shared_params/sampling_params.py    | 1 +
 src/llama_stack_client/types/tool_def_param.py                   | 1 +
 tests/api_resources/alpha/agents/test_session.py                 | 1 +
 tests/api_resources/alpha/agents/test_steps.py                   | 1 +
 tests/api_resources/alpha/agents/test_turn.py                    | 1 +
 tests/api_resources/alpha/eval/test_jobs.py                      | 1 +
 tests/api_resources/alpha/post_training/test_job.py              | 1 +
 tests/api_resources/alpha/test_agents.py                         | 1 +
 tests/api_resources/alpha/test_benchmarks.py                     | 1 +
 tests/api_resources/alpha/test_eval.py                           | 1 +
 tests/api_resources/alpha/test_inference.py                      | 1 +
 tests/api_resources/alpha/test_post_training.py                  | 1 +
 tests/api_resources/beta/test_datasets.py                        | 1 +
 89 files changed, 89 insertions(+)

diff --git a/src/llama_stack_client/resources/alpha/__init__.py b/src/llama_stack_client/resources/alpha/__init__.py
index 06686720..ae13bed1 100644
--- a/src/llama_stack_client/resources/alpha/__init__.py
+++ b/src/llama_stack_client/resources/alpha/__init__.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from .eval import (
     EvalResource,
     AsyncEvalResource,
diff --git a/src/llama_stack_client/resources/alpha/agents/__init__.py b/src/llama_stack_client/resources/alpha/agents/__init__.py
index c7e9db03..6502dfa1 100644
--- a/src/llama_stack_client/resources/alpha/agents/__init__.py
+++ b/src/llama_stack_client/resources/alpha/agents/__init__.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from .turn import (
     TurnResource,
     AsyncTurnResource,
diff --git a/src/llama_stack_client/resources/alpha/agents/agents.py b/src/llama_stack_client/resources/alpha/agents/agents.py
index 3aacfcec..ac5f58e4 100644
--- a/src/llama_stack_client/resources/alpha/agents/agents.py
+++ b/src/llama_stack_client/resources/alpha/agents/agents.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import httpx
diff --git a/src/llama_stack_client/resources/alpha/agents/session.py b/src/llama_stack_client/resources/alpha/agents/session.py
index 08aaa938..ae2b5af6 100644
--- a/src/llama_stack_client/resources/alpha/agents/session.py
+++ b/src/llama_stack_client/resources/alpha/agents/session.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import httpx
diff --git a/src/llama_stack_client/resources/alpha/agents/steps.py b/src/llama_stack_client/resources/alpha/agents/steps.py
index 7b6a9093..83624bef 100644
--- a/src/llama_stack_client/resources/alpha/agents/steps.py
+++ b/src/llama_stack_client/resources/alpha/agents/steps.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import httpx
diff --git a/src/llama_stack_client/resources/alpha/agents/turn.py b/src/llama_stack_client/resources/alpha/agents/turn.py
index 4a074fa5..85e3d1e5 100644
--- a/src/llama_stack_client/resources/alpha/agents/turn.py
+++ b/src/llama_stack_client/resources/alpha/agents/turn.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Iterable
diff --git a/src/llama_stack_client/resources/alpha/alpha.py b/src/llama_stack_client/resources/alpha/alpha.py
index 8ab9164b..9ba65570 100644
--- a/src/llama_stack_client/resources/alpha/alpha.py
+++ b/src/llama_stack_client/resources/alpha/alpha.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from ..._compat import cached_property
diff --git a/src/llama_stack_client/resources/alpha/benchmarks.py b/src/llama_stack_client/resources/alpha/benchmarks.py
index 760a9f29..dc74cc85 100644
--- a/src/llama_stack_client/resources/alpha/benchmarks.py
+++ b/src/llama_stack_client/resources/alpha/benchmarks.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Type, Union, Iterable, cast
diff --git a/src/llama_stack_client/resources/alpha/eval/__init__.py b/src/llama_stack_client/resources/alpha/eval/__init__.py
index fd978681..3aa93594 100644
--- a/src/llama_stack_client/resources/alpha/eval/__init__.py
+++ b/src/llama_stack_client/resources/alpha/eval/__init__.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from .eval import (
     EvalResource,
     AsyncEvalResource,
diff --git a/src/llama_stack_client/resources/alpha/eval/eval.py b/src/llama_stack_client/resources/alpha/eval/eval.py
index df84066f..89101510 100644
--- a/src/llama_stack_client/resources/alpha/eval/eval.py
+++ b/src/llama_stack_client/resources/alpha/eval/eval.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/resources/alpha/eval/jobs.py b/src/llama_stack_client/resources/alpha/eval/jobs.py
index 6ead48df..94eed41e 100644
--- a/src/llama_stack_client/resources/alpha/eval/jobs.py
+++ b/src/llama_stack_client/resources/alpha/eval/jobs.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import httpx
diff --git a/src/llama_stack_client/resources/alpha/inference.py b/src/llama_stack_client/resources/alpha/inference.py
index bfa32d14..9db21d26 100644
--- a/src/llama_stack_client/resources/alpha/inference.py
+++ b/src/llama_stack_client/resources/alpha/inference.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Type, cast
diff --git a/src/llama_stack_client/resources/alpha/post_training/__init__.py b/src/llama_stack_client/resources/alpha/post_training/__init__.py
index 65afcf0f..81a6a807 100644
--- a/src/llama_stack_client/resources/alpha/post_training/__init__.py
+++ b/src/llama_stack_client/resources/alpha/post_training/__init__.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from .job import (
     JobResource,
     AsyncJobResource,
diff --git a/src/llama_stack_client/resources/alpha/post_training/job.py b/src/llama_stack_client/resources/alpha/post_training/job.py
index b1fb6c3a..8e09f335 100644
--- a/src/llama_stack_client/resources/alpha/post_training/job.py
+++ b/src/llama_stack_client/resources/alpha/post_training/job.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Type, cast
diff --git a/src/llama_stack_client/resources/alpha/post_training/post_training.py b/src/llama_stack_client/resources/alpha/post_training/post_training.py
index b1428210..9b1fe87a 100644
--- a/src/llama_stack_client/resources/alpha/post_training/post_training.py
+++ b/src/llama_stack_client/resources/alpha/post_training/post_training.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/resources/beta/datasets.py b/src/llama_stack_client/resources/beta/datasets.py
index 1c332f6e..03321e48 100644
--- a/src/llama_stack_client/resources/beta/datasets.py
+++ b/src/llama_stack_client/resources/beta/datasets.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Type, Union, Iterable, cast
diff --git a/src/llama_stack_client/types/alpha/agent_create_params.py b/src/llama_stack_client/types/alpha/agent_create_params.py
index 43c32d8f..9c420379 100644
--- a/src/llama_stack_client/types/alpha/agent_create_params.py
+++ b/src/llama_stack_client/types/alpha/agent_create_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/agent_create_response.py b/src/llama_stack_client/types/alpha/agent_create_response.py
index 45e53b72..70e7d98b 100644
--- a/src/llama_stack_client/types/alpha/agent_create_response.py
+++ b/src/llama_stack_client/types/alpha/agent_create_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from ..._models import BaseModel
 
 __all__ = ["AgentCreateResponse"]
diff --git a/src/llama_stack_client/types/alpha/agent_list_params.py b/src/llama_stack_client/types/alpha/agent_list_params.py
index e1f5e99d..0b50ef24 100644
--- a/src/llama_stack_client/types/alpha/agent_list_params.py
+++ b/src/llama_stack_client/types/alpha/agent_list_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing_extensions import TypedDict
diff --git a/src/llama_stack_client/types/alpha/agent_list_response.py b/src/llama_stack_client/types/alpha/agent_list_response.py
index 993de5da..212a4a9f 100644
--- a/src/llama_stack_client/types/alpha/agent_list_response.py
+++ b/src/llama_stack_client/types/alpha/agent_list_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Dict, List, Union, Optional
 
 from ..._models import BaseModel
diff --git a/src/llama_stack_client/types/alpha/agent_retrieve_response.py b/src/llama_stack_client/types/alpha/agent_retrieve_response.py
index 3a597e7e..bcf40e21 100644
--- a/src/llama_stack_client/types/alpha/agent_retrieve_response.py
+++ b/src/llama_stack_client/types/alpha/agent_retrieve_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from datetime import datetime
 
 from ..._models import BaseModel
diff --git a/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py b/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
index 13b12877..5a518938 100644
--- a/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
+++ b/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from ...._models import BaseModel
 from .turn_response_event import TurnResponseEvent
 
diff --git a/src/llama_stack_client/types/alpha/agents/session.py b/src/llama_stack_client/types/alpha/agents/session.py
index 865e419a..c2b3571d 100644
--- a/src/llama_stack_client/types/alpha/agents/session.py
+++ b/src/llama_stack_client/types/alpha/agents/session.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import List
 from datetime import datetime
 
diff --git a/src/llama_stack_client/types/alpha/agents/session_create_params.py b/src/llama_stack_client/types/alpha/agents/session_create_params.py
index f0009504..e8fb03fa 100644
--- a/src/llama_stack_client/types/alpha/agents/session_create_params.py
+++ b/src/llama_stack_client/types/alpha/agents/session_create_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/agents/session_create_response.py b/src/llama_stack_client/types/alpha/agents/session_create_response.py
index f35becd4..dd8b1eba 100644
--- a/src/llama_stack_client/types/alpha/agents/session_create_response.py
+++ b/src/llama_stack_client/types/alpha/agents/session_create_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from ...._models import BaseModel
 
 __all__ = ["SessionCreateResponse"]
diff --git a/src/llama_stack_client/types/alpha/agents/session_list_params.py b/src/llama_stack_client/types/alpha/agents/session_list_params.py
index d1546116..0ff7609b 100644
--- a/src/llama_stack_client/types/alpha/agents/session_list_params.py
+++ b/src/llama_stack_client/types/alpha/agents/session_list_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing_extensions import TypedDict
diff --git a/src/llama_stack_client/types/alpha/agents/session_list_response.py b/src/llama_stack_client/types/alpha/agents/session_list_response.py
index 09e59c52..ad686bd3 100644
--- a/src/llama_stack_client/types/alpha/agents/session_list_response.py
+++ b/src/llama_stack_client/types/alpha/agents/session_list_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Dict, List, Union, Optional
 
 from ...._models import BaseModel
diff --git a/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py b/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
index a2cb53c0..27bc0761 100644
--- a/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
+++ b/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py b/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
index ab18c945..300c6ffb 100644
--- a/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
+++ b/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Union
 from typing_extensions import Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/alpha/agents/turn.py b/src/llama_stack_client/types/alpha/agents/turn.py
index b258dbca..51ec9ddf 100644
--- a/src/llama_stack_client/types/alpha/agents/turn.py
+++ b/src/llama_stack_client/types/alpha/agents/turn.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import List, Union, Optional
 from datetime import datetime
 from typing_extensions import Literal, Annotated, TypeAlias
diff --git a/src/llama_stack_client/types/alpha/agents/turn_create_params.py b/src/llama_stack_client/types/alpha/agents/turn_create_params.py
index c4e810fa..79ee42be 100644
--- a/src/llama_stack_client/types/alpha/agents/turn_create_params.py
+++ b/src/llama_stack_client/types/alpha/agents/turn_create_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/agents/turn_response_event.py b/src/llama_stack_client/types/alpha/agents/turn_response_event.py
index 70f92073..3088e623 100644
--- a/src/llama_stack_client/types/alpha/agents/turn_response_event.py
+++ b/src/llama_stack_client/types/alpha/agents/turn_response_event.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/alpha/agents/turn_resume_params.py b/src/llama_stack_client/types/alpha/agents/turn_resume_params.py
index 7f4d205a..23fda973 100644
--- a/src/llama_stack_client/types/alpha/agents/turn_resume_params.py
+++ b/src/llama_stack_client/types/alpha/agents/turn_resume_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/algorithm_config_param.py b/src/llama_stack_client/types/alpha/algorithm_config_param.py
index df015181..bf3b7d0b 100644
--- a/src/llama_stack_client/types/alpha/algorithm_config_param.py
+++ b/src/llama_stack_client/types/alpha/algorithm_config_param.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Union
diff --git a/src/llama_stack_client/types/alpha/benchmark.py b/src/llama_stack_client/types/alpha/benchmark.py
index 9a0a10ce..b70c8f28 100644
--- a/src/llama_stack_client/types/alpha/benchmark.py
+++ b/src/llama_stack_client/types/alpha/benchmark.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal
 
diff --git a/src/llama_stack_client/types/alpha/benchmark_config_param.py b/src/llama_stack_client/types/alpha/benchmark_config_param.py
index c5f88ef4..e32cd187 100644
--- a/src/llama_stack_client/types/alpha/benchmark_config_param.py
+++ b/src/llama_stack_client/types/alpha/benchmark_config_param.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union
diff --git a/src/llama_stack_client/types/alpha/benchmark_list_response.py b/src/llama_stack_client/types/alpha/benchmark_list_response.py
index 8bc89245..56d7d8ba 100644
--- a/src/llama_stack_client/types/alpha/benchmark_list_response.py
+++ b/src/llama_stack_client/types/alpha/benchmark_list_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import List
 from typing_extensions import TypeAlias
 
diff --git a/src/llama_stack_client/types/alpha/benchmark_register_params.py b/src/llama_stack_client/types/alpha/benchmark_register_params.py
index cc8fdd93..84be3786 100644
--- a/src/llama_stack_client/types/alpha/benchmark_register_params.py
+++ b/src/llama_stack_client/types/alpha/benchmark_register_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py b/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
index 7d5ffe14..36036ff9 100644
--- a/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
+++ b/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py b/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
index fe284e4d..3aba96a2 100644
--- a/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
+++ b/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
index 8dc839cb..760f9dc6 100644
--- a/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
+++ b/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/eval_run_eval_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_params.py
index d65931b4..bb166ba3 100644
--- a/src/llama_stack_client/types/alpha/eval_run_eval_params.py
+++ b/src/llama_stack_client/types/alpha/eval_run_eval_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/evaluate_response.py b/src/llama_stack_client/types/alpha/evaluate_response.py
index 18141364..69d310ef 100644
--- a/src/llama_stack_client/types/alpha/evaluate_response.py
+++ b/src/llama_stack_client/types/alpha/evaluate_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Dict, List, Union
 
 from ..._models import BaseModel
diff --git a/src/llama_stack_client/types/alpha/inference_rerank_params.py b/src/llama_stack_client/types/alpha/inference_rerank_params.py
index 5f12dfe9..6502c3d4 100644
--- a/src/llama_stack_client/types/alpha/inference_rerank_params.py
+++ b/src/llama_stack_client/types/alpha/inference_rerank_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Union
diff --git a/src/llama_stack_client/types/alpha/inference_rerank_response.py b/src/llama_stack_client/types/alpha/inference_rerank_response.py
index ef158300..f2cd133c 100644
--- a/src/llama_stack_client/types/alpha/inference_rerank_response.py
+++ b/src/llama_stack_client/types/alpha/inference_rerank_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import List
 from typing_extensions import TypeAlias
 
diff --git a/src/llama_stack_client/types/alpha/inference_step.py b/src/llama_stack_client/types/alpha/inference_step.py
index 67299802..a4dfa054 100644
--- a/src/llama_stack_client/types/alpha/inference_step.py
+++ b/src/llama_stack_client/types/alpha/inference_step.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Optional
 from datetime import datetime
 from typing_extensions import Literal
diff --git a/src/llama_stack_client/types/alpha/job.py b/src/llama_stack_client/types/alpha/job.py
index 57e6b399..696eba85 100644
--- a/src/llama_stack_client/types/alpha/job.py
+++ b/src/llama_stack_client/types/alpha/job.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing_extensions import Literal
 
 from ..._models import BaseModel
diff --git a/src/llama_stack_client/types/alpha/list_benchmarks_response.py b/src/llama_stack_client/types/alpha/list_benchmarks_response.py
index ca3d5462..8ea3b963 100644
--- a/src/llama_stack_client/types/alpha/list_benchmarks_response.py
+++ b/src/llama_stack_client/types/alpha/list_benchmarks_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from ..._models import BaseModel
 from .benchmark_list_response import BenchmarkListResponse
 
diff --git a/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py b/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
index d369c7d4..6c87bcd7 100644
--- a/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
+++ b/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from ..._models import BaseModel
 from .post_training.job_list_response import JobListResponse
 
diff --git a/src/llama_stack_client/types/alpha/memory_retrieval_step.py b/src/llama_stack_client/types/alpha/memory_retrieval_step.py
index 9929137c..1b5708ce 100644
--- a/src/llama_stack_client/types/alpha/memory_retrieval_step.py
+++ b/src/llama_stack_client/types/alpha/memory_retrieval_step.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Optional
 from datetime import datetime
 from typing_extensions import Literal
diff --git a/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py b/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
index 537f2326..e18e76e0 100644
--- a/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py b/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
index 07b813d2..508ba75d 100644
--- a/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import List, Optional
 from datetime import datetime
 
diff --git a/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py b/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
index d110a44b..fc1f9a32 100644
--- a/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/post_training/job_list_response.py b/src/llama_stack_client/types/alpha/post_training/job_list_response.py
index 2af5f14b..95b5d7c5 100644
--- a/src/llama_stack_client/types/alpha/post_training/job_list_response.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_list_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import List
 from typing_extensions import TypeAlias
 
diff --git a/src/llama_stack_client/types/alpha/post_training/job_status_params.py b/src/llama_stack_client/types/alpha/post_training/job_status_params.py
index 6ae01343..5b832347 100644
--- a/src/llama_stack_client/types/alpha/post_training/job_status_params.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_status_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing_extensions import Required, TypedDict
diff --git a/src/llama_stack_client/types/alpha/post_training/job_status_response.py b/src/llama_stack_client/types/alpha/post_training/job_status_response.py
index ebb135d7..cfe9d54f 100644
--- a/src/llama_stack_client/types/alpha/post_training/job_status_response.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_status_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Dict, List, Union, Optional
 from datetime import datetime
 from typing_extensions import Literal
diff --git a/src/llama_stack_client/types/alpha/post_training_job.py b/src/llama_stack_client/types/alpha/post_training_job.py
index 3446fa70..5d3a5391 100644
--- a/src/llama_stack_client/types/alpha/post_training_job.py
+++ b/src/llama_stack_client/types/alpha/post_training_job.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from ..._models import BaseModel
 
 __all__ = ["PostTrainingJob"]
diff --git a/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py b/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
index b1555e62..35c9e023 100644
--- a/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
+++ b/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py b/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
index 730f2460..dfdc68e8 100644
--- a/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
+++ b/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/alpha/shield_call_step.py b/src/llama_stack_client/types/alpha/shield_call_step.py
index f1fe1804..f332a4d5 100644
--- a/src/llama_stack_client/types/alpha/shield_call_step.py
+++ b/src/llama_stack_client/types/alpha/shield_call_step.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Optional
 from datetime import datetime
 from typing_extensions import Literal
diff --git a/src/llama_stack_client/types/alpha/tool_execution_step.py b/src/llama_stack_client/types/alpha/tool_execution_step.py
index b6f638b1..04259318 100644
--- a/src/llama_stack_client/types/alpha/tool_execution_step.py
+++ b/src/llama_stack_client/types/alpha/tool_execution_step.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import List, Optional
 from datetime import datetime
 from typing_extensions import Literal
diff --git a/src/llama_stack_client/types/alpha/tool_response.py b/src/llama_stack_client/types/alpha/tool_response.py
index 56b8ce46..250ae9de 100644
--- a/src/llama_stack_client/types/alpha/tool_response.py
+++ b/src/llama_stack_client/types/alpha/tool_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal
 
diff --git a/src/llama_stack_client/types/alpha/tool_response_param.py b/src/llama_stack_client/types/alpha/tool_response_param.py
index 4c49f3c9..9d745da5 100644
--- a/src/llama_stack_client/types/alpha/tool_response_param.py
+++ b/src/llama_stack_client/types/alpha/tool_response_param.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/beta/dataset_appendrows_params.py b/src/llama_stack_client/types/beta/dataset_appendrows_params.py
index 4a1cd708..b929d790 100644
--- a/src/llama_stack_client/types/beta/dataset_appendrows_params.py
+++ b/src/llama_stack_client/types/beta/dataset_appendrows_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/beta/dataset_iterrows_params.py b/src/llama_stack_client/types/beta/dataset_iterrows_params.py
index e3c0feae..262e0e3f 100644
--- a/src/llama_stack_client/types/beta/dataset_iterrows_params.py
+++ b/src/llama_stack_client/types/beta/dataset_iterrows_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing_extensions import TypedDict
diff --git a/src/llama_stack_client/types/beta/dataset_iterrows_response.py b/src/llama_stack_client/types/beta/dataset_iterrows_response.py
index 03344c76..5b23d46d 100644
--- a/src/llama_stack_client/types/beta/dataset_iterrows_response.py
+++ b/src/llama_stack_client/types/beta/dataset_iterrows_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Dict, List, Union, Optional
 
 from ..._models import BaseModel
diff --git a/src/llama_stack_client/types/beta/dataset_list_response.py b/src/llama_stack_client/types/beta/dataset_list_response.py
index 11ef892d..7e6c1141 100644
--- a/src/llama_stack_client/types/beta/dataset_list_response.py
+++ b/src/llama_stack_client/types/beta/dataset_list_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/beta/dataset_register_params.py b/src/llama_stack_client/types/beta/dataset_register_params.py
index 21979b13..75803a8a 100644
--- a/src/llama_stack_client/types/beta/dataset_register_params.py
+++ b/src/llama_stack_client/types/beta/dataset_register_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/beta/dataset_register_response.py b/src/llama_stack_client/types/beta/dataset_register_response.py
index 398c91cd..e9bb82d2 100644
--- a/src/llama_stack_client/types/beta/dataset_register_response.py
+++ b/src/llama_stack_client/types/beta/dataset_register_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/beta/dataset_retrieve_response.py b/src/llama_stack_client/types/beta/dataset_retrieve_response.py
index e37b166e..3358288d 100644
--- a/src/llama_stack_client/types/beta/dataset_retrieve_response.py
+++ b/src/llama_stack_client/types/beta/dataset_retrieve_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/beta/list_datasets_response.py b/src/llama_stack_client/types/beta/list_datasets_response.py
index 2ecaba4e..4f71ae16 100644
--- a/src/llama_stack_client/types/beta/list_datasets_response.py
+++ b/src/llama_stack_client/types/beta/list_datasets_response.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from ..._models import BaseModel
 from .dataset_list_response import DatasetListResponse
 
diff --git a/src/llama_stack_client/types/shared/agent_config.py b/src/llama_stack_client/types/shared/agent_config.py
index 28b26397..71d53ca5 100644
--- a/src/llama_stack_client/types/shared/agent_config.py
+++ b/src/llama_stack_client/types/shared/agent_config.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, TypeAlias
 
diff --git a/src/llama_stack_client/types/shared/response_format.py b/src/llama_stack_client/types/shared/response_format.py
index e996d9ff..0d601a23 100644
--- a/src/llama_stack_client/types/shared/response_format.py
+++ b/src/llama_stack_client/types/shared/response_format.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import Dict, List, Union
 from typing_extensions import Literal, Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/shared/sampling_params.py b/src/llama_stack_client/types/shared/sampling_params.py
index 622687a0..f34ed6f5 100644
--- a/src/llama_stack_client/types/shared/sampling_params.py
+++ b/src/llama_stack_client/types/shared/sampling_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from typing import List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
diff --git a/src/llama_stack_client/types/shared_params/agent_config.py b/src/llama_stack_client/types/shared_params/agent_config.py
index de04c83d..d444e5da 100644
--- a/src/llama_stack_client/types/shared_params/agent_config.py
+++ b/src/llama_stack_client/types/shared_params/agent_config.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/shared_params/response_format.py b/src/llama_stack_client/types/shared_params/response_format.py
index b82680ed..c3146dfc 100644
--- a/src/llama_stack_client/types/shared_params/response_format.py
+++ b/src/llama_stack_client/types/shared_params/response_format.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/src/llama_stack_client/types/shared_params/sampling_params.py b/src/llama_stack_client/types/shared_params/sampling_params.py
index 5e762498..9be15690 100644
--- a/src/llama_stack_client/types/shared_params/sampling_params.py
+++ b/src/llama_stack_client/types/shared_params/sampling_params.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Union
diff --git a/src/llama_stack_client/types/tool_def_param.py b/src/llama_stack_client/types/tool_def_param.py
index 8e41b121..99e7def1 100644
--- a/src/llama_stack_client/types/tool_def_param.py
+++ b/src/llama_stack_client/types/tool_def_param.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
diff --git a/tests/api_resources/alpha/agents/test_session.py b/tests/api_resources/alpha/agents/test_session.py
index 8a3fad4f..554c2d4e 100644
--- a/tests/api_resources/alpha/agents/test_session.py
+++ b/tests/api_resources/alpha/agents/test_session.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/agents/test_steps.py b/tests/api_resources/alpha/agents/test_steps.py
index f099b2d6..c001dd23 100644
--- a/tests/api_resources/alpha/agents/test_steps.py
+++ b/tests/api_resources/alpha/agents/test_steps.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/agents/test_turn.py b/tests/api_resources/alpha/agents/test_turn.py
index 0f56eef1..26f4a7b7 100644
--- a/tests/api_resources/alpha/agents/test_turn.py
+++ b/tests/api_resources/alpha/agents/test_turn.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/eval/test_jobs.py b/tests/api_resources/alpha/eval/test_jobs.py
index c521d9ee..42844d80 100644
--- a/tests/api_resources/alpha/eval/test_jobs.py
+++ b/tests/api_resources/alpha/eval/test_jobs.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/post_training/test_job.py b/tests/api_resources/alpha/post_training/test_job.py
index 568c175b..611bf4b6 100644
--- a/tests/api_resources/alpha/post_training/test_job.py
+++ b/tests/api_resources/alpha/post_training/test_job.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/test_agents.py b/tests/api_resources/alpha/test_agents.py
index 09a6ce17..3324871c 100644
--- a/tests/api_resources/alpha/test_agents.py
+++ b/tests/api_resources/alpha/test_agents.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/test_benchmarks.py b/tests/api_resources/alpha/test_benchmarks.py
index a00a6ab1..71ad6bc2 100644
--- a/tests/api_resources/alpha/test_benchmarks.py
+++ b/tests/api_resources/alpha/test_benchmarks.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/test_eval.py b/tests/api_resources/alpha/test_eval.py
index 3e8f0110..1ee20010 100644
--- a/tests/api_resources/alpha/test_eval.py
+++ b/tests/api_resources/alpha/test_eval.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/test_inference.py b/tests/api_resources/alpha/test_inference.py
index 98565983..d1308222 100644
--- a/tests/api_resources/alpha/test_inference.py
+++ b/tests/api_resources/alpha/test_inference.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/alpha/test_post_training.py b/tests/api_resources/alpha/test_post_training.py
index c9827a1a..92f45593 100644
--- a/tests/api_resources/alpha/test_post_training.py
+++ b/tests/api_resources/alpha/test_post_training.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import os
diff --git a/tests/api_resources/beta/test_datasets.py b/tests/api_resources/beta/test_datasets.py
index 23798e8d..7a6fc7c9 100644
--- a/tests/api_resources/beta/test_datasets.py
+++ b/tests/api_resources/beta/test_datasets.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
 from __future__ import annotations
 
 import os

From 799b9084266c390604829dd1eef483bf3b941134 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Thu, 30 Oct 2025 13:30:41 -0700
Subject: [PATCH 8/9] fix: clean pre-commit

---
 tests/api_resources/alpha/__init__.py               | 1 -
 tests/api_resources/alpha/agents/__init__.py        | 1 -
 tests/api_resources/alpha/eval/__init__.py          | 1 -
 tests/api_resources/alpha/post_training/__init__.py | 1 -
 4 files changed, 4 deletions(-)

diff --git a/tests/api_resources/alpha/__init__.py b/tests/api_resources/alpha/__init__.py
index fe2f0b1c..6a8e62e9 100644
--- a/tests/api_resources/alpha/__init__.py
+++ b/tests/api_resources/alpha/__init__.py
@@ -5,4 +5,3 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
diff --git a/tests/api_resources/alpha/agents/__init__.py b/tests/api_resources/alpha/agents/__init__.py
index fe2f0b1c..6a8e62e9 100644
--- a/tests/api_resources/alpha/agents/__init__.py
+++ b/tests/api_resources/alpha/agents/__init__.py
@@ -5,4 +5,3 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
diff --git a/tests/api_resources/alpha/eval/__init__.py b/tests/api_resources/alpha/eval/__init__.py
index fe2f0b1c..6a8e62e9 100644
--- a/tests/api_resources/alpha/eval/__init__.py
+++ b/tests/api_resources/alpha/eval/__init__.py
@@ -5,4 +5,3 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
diff --git a/tests/api_resources/alpha/post_training/__init__.py b/tests/api_resources/alpha/post_training/__init__.py
index fe2f0b1c..6a8e62e9 100644
--- a/tests/api_resources/alpha/post_training/__init__.py
+++ b/tests/api_resources/alpha/post_training/__init__.py
@@ -5,4 +5,3 @@
 # the root directory of this source tree.
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-

From 1f1f00f5dbba1e617ae258bc34074df9b9cb2ebc Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 30 Oct 2025 20:31:09 +0000
Subject: [PATCH 9/9] release: 0.4.0-alpha.1

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 27 +++++++++++++++++++++++++++
 pyproject.toml                |  2 +-
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index f57aac48..a1e0736b 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.3.1-alpha.2"
+  ".": "0.4.0-alpha.1"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c2ebd74f..ab7d3936 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,32 @@
 # Changelog
 
+## 0.4.0-alpha.1 (2025-10-30)
+
+Full Changelog: [v0.3.1-alpha.2...v0.4.0-alpha.1](https://github.com/llamastack/llama-stack-client-python/compare/v0.3.1-alpha.2...v0.4.0-alpha.1)
+
+### ⚠ BREAKING CHANGES
+
+* **api:** /v1/inspect only lists v1 apis by default
+* **api:** /v1/inspect only lists v1 apis by default
+
+### Features
+
+* **api:** Adding prompts API to stainless config ([114198b](https://github.com/llamastack/llama-stack-client-python/commit/114198bef4244ec27f7e163beb2e554da0dbd213))
+* **api:** manual updates??! ([d8ab6cb](https://github.com/llamastack/llama-stack-client-python/commit/d8ab6cb77267af53f3f2e9ff3ebaab9364a754c7))
+
+
+### Bug Fixes
+
+* clean pre-commit ([799b908](https://github.com/llamastack/llama-stack-client-python/commit/799b9084266c390604829dd1eef483bf3b941134))
+* **client:** close streams without requiring full consumption ([d861708](https://github.com/llamastack/llama-stack-client-python/commit/d8617084062acbb81c26b6c22ea613e397aa969b))
+* **headers:** add a newline ([55a8efc](https://github.com/llamastack/llama-stack-client-python/commit/55a8efc0a60f44c8c93e18b2b60215f051405be4))
+
+
+### Chores
+
+* **api:** /v1/inspect only lists v1 apis by default ([209de45](https://github.com/llamastack/llama-stack-client-python/commit/209de45599de19183a1cd14bc3567e34d2374184))
+* **api:** /v1/inspect only lists v1 apis by default ([b36e2ab](https://github.com/llamastack/llama-stack-client-python/commit/b36e2ab8661e4913838c2cb4501156b290876da0))
+
 ## 0.3.1-alpha.2 (2025-10-27)
 
 Full Changelog: [v0.3.1-alpha.1...v0.3.1-alpha.2](https://github.com/llamastack/llama-stack-client-python/compare/v0.3.1-alpha.1...v0.3.1-alpha.2)
diff --git a/pyproject.toml b/pyproject.toml
index 3e7e62aa..1b1f5563 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_client"
-version = "0.3.1-alpha.2"
+version = "0.4.0-alpha.1"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "MIT"