diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 1ae25264..fe81c38e 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.3.0-alpha.1"
+  ".": "0.3.0-alpha.2"
 }
diff --git a/.stats.yml b/.stats.yml
index 755df453..448f9057 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 105
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-d7bea816190382a93511491e33d1f37f707620926ab133ae8ce0883d763df741.yml
-openapi_spec_hash: f73b3af77108625edae3f25972b9e665
-config_hash: 548f336ac1b68ab1dfe385b79df764dd
+configured_endpoints: 109
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-4337a6181c2db17737133e944b4b660a5e00ea10dce6be3252918e39451e9b5f.yml
+openapi_spec_hash: a0bc8f4b5f45bc5741fed8eaa61171c3
+config_hash: d8706905bf16d9e4141e88d5a778263b
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 93d68692..57ceb8eb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,22 @@
 # Changelog
 
+## 0.3.0-alpha.2 (2025-09-30)
+
+Full Changelog: [v0.3.0-alpha.1...v0.3.0-alpha.2](https://github.com/llamastack/llama-stack-client-python/compare/v0.3.0-alpha.1...v0.3.0-alpha.2)
+
+### Features
+
+* **api:** move post_training and eval under alpha namespace ([51a54d4](https://github.com/llamastack/llama-stack-client-python/commit/51a54d458f950da36bccab067a46f255a49933a1))
+* **api:** moving { rerank, agents } to `client.alpha.` ([04caaad](https://github.com/llamastack/llama-stack-client-python/commit/04caaad5e2631d66f5a0d30c07e1872d3aded894))
+* **api:** SDKs for vector store file batches ([ee323a0](https://github.com/llamastack/llama-stack-client-python/commit/ee323a0e29892ef53b7d98bf8ad943cd865fb785))
+* **api:** SDKs for vector store file batches apis ([6e0dcff](https://github.com/llamastack/llama-stack-client-python/commit/6e0dcff322ca2a62e027b0089c3bd6fdfec4f27a))
+
+
+### Bug Fixes
+
+* fix stream event model reference ([0208f2a](https://github.com/llamastack/llama-stack-client-python/commit/0208f2aa0153d65ed4d46123d4509c66d2730e3c))
+* **manual:** update lib/ references to use the alpha namespace ([08670f1](https://github.com/llamastack/llama-stack-client-python/commit/08670f1023a73edea559bd896a59cbbd127c5a6b))
+
 ## 0.3.0-alpha.1 (2025-09-30)
 
 Full Changelog: [v0.2.23-alpha.1...v0.3.0-alpha.1](https://github.com/llamastack/llama-stack-client-python/compare/v0.2.23-alpha.1...v0.3.0-alpha.1)
diff --git a/api.md b/api.md
index c246f4c1..15e91db6 100644
--- a/api.md
+++ b/api.md
@@ -3,13 +3,11 @@
 ```python
 from llama_stack_client.types import (
     AgentConfig,
-    ChatCompletionResponse,
     CompletionMessage,
     Document,
     InterleavedContent,
     InterleavedContentItem,
     Message,
-    Metric,
     ParamType,
     QueryConfig,
     QueryResult,
@@ -19,7 +17,6 @@ from llama_stack_client.types import (
     ScoringResult,
     SystemMessage,
     ToolCall,
-    ToolParamDefinition,
     ToolResponseMessage,
     UserMessage,
 )
@@ -105,71 +102,6 @@ Methods:
 
 - <code title="get /v1/responses/{response_id}/input_items">client.responses.input_items.<a href="./src/llama_stack_client/resources/responses/input_items.py">list</a>(response_id, \*\*<a href="src/llama_stack_client/types/responses/input_item_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/responses/input_item_list_response.py">InputItemListResponse</a></code>
 
-# Agents
-
-Types:
-
-```python
-from llama_stack_client.types import (
-    InferenceStep,
-    MemoryRetrievalStep,
-    ShieldCallStep,
-    ToolExecutionStep,
-    ToolResponse,
-    AgentCreateResponse,
-    AgentRetrieveResponse,
-    AgentListResponse,
-)
-```
-
-Methods:
-
-- <code title="post /v1/agents">client.agents.<a href="./src/llama_stack_client/resources/agents/agents.py">create</a>(\*\*<a href="src/llama_stack_client/types/agent_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agent_create_response.py">AgentCreateResponse</a></code>
-- <code title="get /v1/agents/{agent_id}">client.agents.<a href="./src/llama_stack_client/resources/agents/agents.py">retrieve</a>(agent_id) -> <a href="./src/llama_stack_client/types/agent_retrieve_response.py">AgentRetrieveResponse</a></code>
-- <code title="get /v1/agents">client.agents.<a href="./src/llama_stack_client/resources/agents/agents.py">list</a>(\*\*<a href="src/llama_stack_client/types/agent_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agent_list_response.py">AgentListResponse</a></code>
-- <code title="delete /v1/agents/{agent_id}">client.agents.<a href="./src/llama_stack_client/resources/agents/agents.py">delete</a>(agent_id) -> None</code>
-
-## Session
-
-Types:
-
-```python
-from llama_stack_client.types.agents import Session, SessionCreateResponse, SessionListResponse
-```
-
-Methods:
-
-- <code title="post /v1/agents/{agent_id}/session">client.agents.session.<a href="./src/llama_stack_client/resources/agents/session.py">create</a>(agent_id, \*\*<a href="src/llama_stack_client/types/agents/session_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/session_create_response.py">SessionCreateResponse</a></code>
-- <code title="get /v1/agents/{agent_id}/session/{session_id}">client.agents.session.<a href="./src/llama_stack_client/resources/agents/session.py">retrieve</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/agents/session_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/session.py">Session</a></code>
-- <code title="get /v1/agents/{agent_id}/sessions">client.agents.session.<a href="./src/llama_stack_client/resources/agents/session.py">list</a>(agent_id, \*\*<a href="src/llama_stack_client/types/agents/session_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/session_list_response.py">SessionListResponse</a></code>
-- <code title="delete /v1/agents/{agent_id}/session/{session_id}">client.agents.session.<a href="./src/llama_stack_client/resources/agents/session.py">delete</a>(session_id, \*, agent_id) -> None</code>
-
-## Steps
-
-Types:
-
-```python
-from llama_stack_client.types.agents import StepRetrieveResponse
-```
-
-Methods:
-
-- <code title="get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}">client.agents.steps.<a href="./src/llama_stack_client/resources/agents/steps.py">retrieve</a>(step_id, \*, agent_id, session_id, turn_id) -> <a href="./src/llama_stack_client/types/agents/step_retrieve_response.py">StepRetrieveResponse</a></code>
-
-## Turn
-
-Types:
-
-```python
-from llama_stack_client.types.agents import AgentTurnResponseStreamChunk, Turn, TurnResponseEvent
-```
-
-Methods:
-
-- <code title="post /v1/agents/{agent_id}/session/{session_id}/turn">client.agents.turn.<a href="./src/llama_stack_client/resources/agents/turn.py">create</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/agents/turn_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/turn.py">Turn</a></code>
-- <code title="get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}">client.agents.turn.<a href="./src/llama_stack_client/resources/agents/turn.py">retrieve</a>(turn_id, \*, agent_id, session_id) -> <a href="./src/llama_stack_client/types/agents/turn.py">Turn</a></code>
-- <code title="post /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume">client.agents.turn.<a href="./src/llama_stack_client/resources/agents/turn.py">resume</a>(turn_id, \*, agent_id, session_id, \*\*<a href="src/llama_stack_client/types/agents/turn_resume_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/turn.py">Turn</a></code>
-
 # Datasets
 
 Types:
@@ -193,29 +125,6 @@ Methods:
 - <code title="post /v1/datasets">client.datasets.<a href="./src/llama_stack_client/resources/datasets.py">register</a>(\*\*<a href="src/llama_stack_client/types/dataset_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/dataset_register_response.py">DatasetRegisterResponse</a></code>
 - <code title="delete /v1/datasets/{dataset_id}">client.datasets.<a href="./src/llama_stack_client/resources/datasets.py">unregister</a>(dataset_id) -> None</code>
 
-# Eval
-
-Types:
-
-```python
-from llama_stack_client.types import BenchmarkConfig, EvaluateResponse, Job
-```
-
-Methods:
-
-- <code title="post /v1/eval/benchmarks/{benchmark_id}/evaluations">client.eval.<a href="./src/llama_stack_client/resources/eval/eval.py">evaluate_rows</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/eval_evaluate_rows_params.py">params</a>) -> <a href="./src/llama_stack_client/types/evaluate_response.py">EvaluateResponse</a></code>
-- <code title="post /v1/eval/benchmarks/{benchmark_id}/evaluations">client.eval.<a href="./src/llama_stack_client/resources/eval/eval.py">evaluate_rows_alpha</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py">params</a>) -> <a href="./src/llama_stack_client/types/evaluate_response.py">EvaluateResponse</a></code>
-- <code title="post /v1/eval/benchmarks/{benchmark_id}/jobs">client.eval.<a href="./src/llama_stack_client/resources/eval/eval.py">run_eval</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/eval_run_eval_params.py">params</a>) -> <a href="./src/llama_stack_client/types/job.py">Job</a></code>
-- <code title="post /v1/eval/benchmarks/{benchmark_id}/jobs">client.eval.<a href="./src/llama_stack_client/resources/eval/eval.py">run_eval_alpha</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/eval_run_eval_alpha_params.py">params</a>) -> <a href="./src/llama_stack_client/types/job.py">Job</a></code>
-
-## Jobs
-
-Methods:
-
-- <code title="get /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result">client.eval.jobs.<a href="./src/llama_stack_client/resources/eval/jobs.py">retrieve</a>(job_id, \*, benchmark_id) -> <a href="./src/llama_stack_client/types/evaluate_response.py">EvaluateResponse</a></code>
-- <code title="delete /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.eval.jobs.<a href="./src/llama_stack_client/resources/eval/jobs.py">cancel</a>(job_id, \*, benchmark_id) -> None</code>
-- <code title="get /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.eval.jobs.<a href="./src/llama_stack_client/resources/eval/jobs.py">status</a>(job_id, \*, benchmark_id) -> <a href="./src/llama_stack_client/types/job.py">Job</a></code>
-
 # Inspect
 
 Types:
@@ -229,18 +138,6 @@ Methods:
 - <code title="get /v1/health">client.inspect.<a href="./src/llama_stack_client/resources/inspect.py">health</a>() -> <a href="./src/llama_stack_client/types/health_info.py">HealthInfo</a></code>
 - <code title="get /v1/version">client.inspect.<a href="./src/llama_stack_client/resources/inspect.py">version</a>() -> <a href="./src/llama_stack_client/types/version_info.py">VersionInfo</a></code>
 
-# Inference
-
-Types:
-
-```python
-from llama_stack_client.types import InferenceRerankResponse
-```
-
-Methods:
-
-- <code title="post /v1alpha/inference/rerank">client.inference.<a href="./src/llama_stack_client/resources/inference.py">rerank</a>(\*\*<a href="src/llama_stack_client/types/inference_rerank_params.py">params</a>) -> <a href="./src/llama_stack_client/types/inference_rerank_response.py">InferenceRerankResponse</a></code>
-
 # Embeddings
 
 Types:
@@ -367,64 +264,50 @@ Methods:
 - <code title="delete /v1/vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">delete</a>(file_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/file_delete_response.py">FileDeleteResponse</a></code>
 - <code title="get /v1/vector_stores/{vector_store_id}/files/{file_id}/content">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">content</a>(file_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/file_content_response.py">FileContentResponse</a></code>
 
-# Models
-
-Types:
-
-```python
-from llama_stack_client.types import ListModelsResponse, Model, ModelListResponse
-```
-
-Methods:
-
-- <code title="get /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models/models.py">retrieve</a>(model_id) -> <a href="./src/llama_stack_client/types/model.py">Model</a></code>
-- <code title="get /v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
-- <code title="post /v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">register</a>(\*\*<a href="src/llama_stack_client/types/model_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/model.py">Model</a></code>
-- <code title="delete /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models/models.py">unregister</a>(model_id) -> None</code>
-
-## OpenAI
+## FileBatches
 
 Types:
 
 ```python
-from llama_stack_client.types.models import OpenAIListResponse
+from llama_stack_client.types.vector_stores import (
+    ListVectorStoreFilesInBatchResponse,
+    VectorStoreFileBatches,
+)
 ```
 
 Methods:
 
-- <code title="get /v1/models">client.models.openai.<a href="./src/llama_stack_client/resources/models/openai.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
+- <code title="post /v1/vector_stores/{vector_store_id}/file_batches">client.vector_stores.file_batches.<a href="./src/llama_stack_client/resources/vector_stores/file_batches.py">create</a>(vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_batch_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file_batches.py">VectorStoreFileBatches</a></code>
+- <code title="get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}">client.vector_stores.file_batches.<a href="./src/llama_stack_client/resources/vector_stores/file_batches.py">retrieve</a>(batch_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file_batches.py">VectorStoreFileBatches</a></code>
+- <code title="get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files">client.vector_stores.file_batches.<a href="./src/llama_stack_client/resources/vector_stores/file_batches.py">list</a>(batch_id, \*, vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_batch_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file.py">SyncOpenAICursorPage[VectorStoreFile]</a></code>
+- <code title="post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel">client.vector_stores.file_batches.<a href="./src/llama_stack_client/resources/vector_stores/file_batches.py">cancel</a>(batch_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file_batches.py">VectorStoreFileBatches</a></code>
 
-# PostTraining
+# Models
 
 Types:
 
 ```python
-from llama_stack_client.types import AlgorithmConfig, ListPostTrainingJobsResponse, PostTrainingJob
+from llama_stack_client.types import ListModelsResponse, Model, ModelListResponse
 ```
 
 Methods:
 
-- <code title="post /v1/post-training/preference-optimize">client.post_training.<a href="./src/llama_stack_client/resources/post_training/post_training.py">preference_optimize</a>(\*\*<a href="src/llama_stack_client/types/post_training_preference_optimize_params.py">params</a>) -> <a href="./src/llama_stack_client/types/post_training_job.py">PostTrainingJob</a></code>
-- <code title="post /v1/post-training/supervised-fine-tune">client.post_training.<a href="./src/llama_stack_client/resources/post_training/post_training.py">supervised_fine_tune</a>(\*\*<a href="src/llama_stack_client/types/post_training_supervised_fine_tune_params.py">params</a>) -> <a href="./src/llama_stack_client/types/post_training_job.py">PostTrainingJob</a></code>
+- <code title="get /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models/models.py">retrieve</a>(model_id) -> <a href="./src/llama_stack_client/types/model.py">Model</a></code>
+- <code title="get /v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
+- <code title="post /v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">register</a>(\*\*<a href="src/llama_stack_client/types/model_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/model.py">Model</a></code>
+- <code title="delete /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models/models.py">unregister</a>(model_id) -> None</code>
 
-## Job
+## OpenAI
 
 Types:
 
 ```python
-from llama_stack_client.types.post_training import (
-    JobListResponse,
-    JobArtifactsResponse,
-    JobStatusResponse,
-)
+from llama_stack_client.types.models import OpenAIListResponse
 ```
 
 Methods:
 
-- <code title="get /v1/post-training/jobs">client.post_training.job.<a href="./src/llama_stack_client/resources/post_training/job.py">list</a>() -> List[Data]</code>
-- <code title="get /v1/post-training/job/artifacts">client.post_training.job.<a href="./src/llama_stack_client/resources/post_training/job.py">artifacts</a>(\*\*<a href="src/llama_stack_client/types/post_training/job_artifacts_params.py">params</a>) -> <a href="./src/llama_stack_client/types/post_training/job_artifacts_response.py">JobArtifactsResponse</a></code>
-- <code title="post /v1/post-training/job/cancel">client.post_training.job.<a href="./src/llama_stack_client/resources/post_training/job.py">cancel</a>(\*\*<a href="src/llama_stack_client/types/post_training/job_cancel_params.py">params</a>) -> None</code>
-- <code title="get /v1/post-training/job/status">client.post_training.job.<a href="./src/llama_stack_client/resources/post_training/job.py">status</a>(\*\*<a href="src/llama_stack_client/types/post_training/job_status_params.py">params</a>) -> <a href="./src/llama_stack_client/types/post_training/job_status_response.py">JobStatusResponse</a></code>
+- <code title="get /v1/models">client.models.openai.<a href="./src/llama_stack_client/resources/models/openai.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
 
 # Providers
 
@@ -593,3 +476,149 @@ Methods:
 - <code title="get /v1/files">client.files.<a href="./src/llama_stack_client/resources/files.py">list</a>(\*\*<a href="src/llama_stack_client/types/file_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/file.py">SyncOpenAICursorPage[File]</a></code>
 - <code title="delete /v1/files/{file_id}">client.files.<a href="./src/llama_stack_client/resources/files.py">delete</a>(file_id) -> <a href="./src/llama_stack_client/types/delete_file_response.py">DeleteFileResponse</a></code>
 - <code title="get /v1/files/{file_id}/content">client.files.<a href="./src/llama_stack_client/resources/files.py">content</a>(file_id) -> object</code>
+
+# Alpha
+
+## Inference
+
+Types:
+
+```python
+from llama_stack_client.types.alpha import InferenceRerankResponse
+```
+
+Methods:
+
+- <code title="post /v1alpha/inference/rerank">client.alpha.inference.<a href="./src/llama_stack_client/resources/alpha/inference.py">rerank</a>(\*\*<a href="src/llama_stack_client/types/alpha/inference_rerank_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/inference_rerank_response.py">InferenceRerankResponse</a></code>
+
+## PostTraining
+
+Types:
+
+```python
+from llama_stack_client.types.alpha import (
+    AlgorithmConfig,
+    ListPostTrainingJobsResponse,
+    PostTrainingJob,
+)
+```
+
+Methods:
+
+- <code title="post /v1alpha/post-training/preference-optimize">client.alpha.post_training.<a href="./src/llama_stack_client/resources/alpha/post_training/post_training.py">preference_optimize</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training_job.py">PostTrainingJob</a></code>
+- <code title="post /v1alpha/post-training/supervised-fine-tune">client.alpha.post_training.<a href="./src/llama_stack_client/resources/alpha/post_training/post_training.py">supervised_fine_tune</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training_job.py">PostTrainingJob</a></code>
+
+### Job
+
+Types:
+
+```python
+from llama_stack_client.types.alpha.post_training import (
+    JobListResponse,
+    JobArtifactsResponse,
+    JobStatusResponse,
+)
+```
+
+Methods:
+
+- <code title="get /v1alpha/post-training/jobs">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">list</a>() -> List[Data]</code>
+- <code title="get /v1alpha/post-training/job/artifacts">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">artifacts</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py">JobArtifactsResponse</a></code>
+- <code title="post /v1alpha/post-training/job/cancel">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">cancel</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training/job_cancel_params.py">params</a>) -> None</code>
+- <code title="get /v1alpha/post-training/job/status">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">status</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training/job_status_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training/job_status_response.py">JobStatusResponse</a></code>
+
+## Eval
+
+Types:
+
+```python
+from llama_stack_client.types.alpha import BenchmarkConfig, EvaluateResponse, Job
+```
+
+Methods:
+
+- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">evaluate_rows</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/evaluate_response.py">EvaluateResponse</a></code>
+- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">evaluate_rows_alpha</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/evaluate_response.py">EvaluateResponse</a></code>
+- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/jobs">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">run_eval</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_run_eval_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/job.py">Job</a></code>
+- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/jobs">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">run_eval_alpha</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/job.py">Job</a></code>
+
+### Jobs
+
+Methods:
+
+- <code title="get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">retrieve</a>(job_id, \*, benchmark_id) -> <a href="./src/llama_stack_client/types/alpha/evaluate_response.py">EvaluateResponse</a></code>
+- <code title="delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">cancel</a>(job_id, \*, benchmark_id) -> None</code>
+- <code title="get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">status</a>(job_id, \*, benchmark_id) -> <a href="./src/llama_stack_client/types/alpha/job.py">Job</a></code>
+
+## Agents
+
+Types:
+
+```python
+from llama_stack_client.types.alpha import (
+    InferenceStep,
+    MemoryRetrievalStep,
+    ShieldCallStep,
+    ToolExecutionStep,
+    ToolResponse,
+    AgentCreateResponse,
+    AgentRetrieveResponse,
+    AgentListResponse,
+)
+```
+
+Methods:
+
+- <code title="post /v1alpha/agents">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">create</a>(\*\*<a href="src/llama_stack_client/types/alpha/agent_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agent_create_response.py">AgentCreateResponse</a></code>
+- <code title="get /v1alpha/agents/{agent_id}">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">retrieve</a>(agent_id) -> <a href="./src/llama_stack_client/types/alpha/agent_retrieve_response.py">AgentRetrieveResponse</a></code>
+- <code title="get /v1alpha/agents">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">list</a>(\*\*<a href="src/llama_stack_client/types/alpha/agent_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agent_list_response.py">AgentListResponse</a></code>
+- <code title="delete /v1alpha/agents/{agent_id}">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">delete</a>(agent_id) -> None</code>
+
+### Session
+
+Types:
+
+```python
+from llama_stack_client.types.alpha.agents import (
+    Session,
+    SessionCreateResponse,
+    SessionListResponse,
+)
+```
+
+Methods:
+
+- <code title="post /v1alpha/agents/{agent_id}/session">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">create</a>(agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session_create_response.py">SessionCreateResponse</a></code>
+- <code title="get /v1alpha/agents/{agent_id}/session/{session_id}">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">retrieve</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session.py">Session</a></code>
+- <code title="get /v1alpha/agents/{agent_id}/sessions">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">list</a>(agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session_list_response.py">SessionListResponse</a></code>
+- <code title="delete /v1alpha/agents/{agent_id}/session/{session_id}">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">delete</a>(session_id, \*, agent_id) -> None</code>
+
+### Steps
+
+Types:
+
+```python
+from llama_stack_client.types.alpha.agents import StepRetrieveResponse
+```
+
+Methods:
+
+- <code title="get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}">client.alpha.agents.steps.<a href="./src/llama_stack_client/resources/alpha/agents/steps.py">retrieve</a>(step_id, \*, agent_id, session_id, turn_id) -> <a href="./src/llama_stack_client/types/alpha/agents/step_retrieve_response.py">StepRetrieveResponse</a></code>
+
+### Turn
+
+Types:
+
+```python
+from llama_stack_client.types.alpha.agents import (
+    AgentTurnResponseStreamChunk,
+    Turn,
+    TurnResponseEvent,
+)
+```
+
+Methods:
+
+- <code title="post /v1alpha/agents/{agent_id}/session/{session_id}/turn">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">create</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/turn_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
+- <code title="get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">retrieve</a>(turn_id, \*, agent_id, session_id) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
+- <code title="post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">resume</a>(turn_id, \*, agent_id, session_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/turn_resume_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
diff --git a/pyproject.toml b/pyproject.toml
index 3b50518e..ba98bc1a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_client"
-version = "0.3.0-alpha.1"
+version = "0.3.0-alpha.2"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "MIT"
diff --git a/src/llama_stack_client/__init__.py b/src/llama_stack_client/__init__.py
index dea90c02..cc2fcb9b 100644
--- a/src/llama_stack_client/__init__.py
+++ b/src/llama_stack_client/__init__.py
@@ -41,8 +41,8 @@
 from .lib.agents.agent import Agent
 from .lib.agents.event_logger import EventLogger as AgentEventLogger
 from .lib.inference.event_logger import EventLogger as InferenceEventLogger
+from .types.alpha.agents.turn_create_params import Document
 from .types.shared_params.document import Document as RAGDocument
-from .types.agents.turn_create_params import Document
 
 __all__ = [
     "types",
diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
index 3feccb40..6b8f11b2 100644
--- a/src/llama_stack_client/_client.py
+++ b/src/llama_stack_client/_client.py
@@ -34,10 +34,9 @@
 if TYPE_CHECKING:
     from .resources import (
         chat,
-        eval,
+        alpha,
         files,
         tools,
-        agents,
         models,
         routes,
         safety,
@@ -45,7 +44,6 @@
         scoring,
         shields,
         datasets,
-        inference,
         providers,
         responses,
         telemetry,
@@ -57,7 +55,6 @@
         completions,
         moderations,
         tool_runtime,
-        post_training,
         vector_stores,
         scoring_functions,
         synthetic_data_generation,
@@ -71,8 +68,6 @@
     from .resources.shields import ShieldsResource, AsyncShieldsResource
     from .resources.datasets import DatasetsResource, AsyncDatasetsResource
     from .resources.chat.chat import ChatResource, AsyncChatResource
-    from .resources.eval.eval import EvalResource, AsyncEvalResource
-    from .resources.inference import InferenceResource, AsyncInferenceResource
     from .resources.providers import ProvidersResource, AsyncProvidersResource
     from .resources.telemetry import TelemetryResource, AsyncTelemetryResource
     from .resources.vector_io import VectorIoResource, AsyncVectorIoResource
@@ -80,9 +75,9 @@
     from .resources.embeddings import EmbeddingsResource, AsyncEmbeddingsResource
     from .resources.toolgroups import ToolgroupsResource, AsyncToolgroupsResource
     from .resources.vector_dbs import VectorDBsResource, AsyncVectorDBsResource
+    from .resources.alpha.alpha import AlphaResource, AsyncAlphaResource
     from .resources.completions import CompletionsResource, AsyncCompletionsResource
     from .resources.moderations import ModerationsResource, AsyncModerationsResource
-    from .resources.agents.agents import AgentsResource, AsyncAgentsResource
     from .resources.models.models import ModelsResource, AsyncModelsResource
     from .resources.scoring_functions import ScoringFunctionsResource, AsyncScoringFunctionsResource
     from .resources.responses.responses import ResponsesResource, AsyncResponsesResource
@@ -91,7 +86,6 @@
         AsyncSyntheticDataGenerationResource,
     )
     from .resources.tool_runtime.tool_runtime import ToolRuntimeResource, AsyncToolRuntimeResource
-    from .resources.post_training.post_training import PostTrainingResource, AsyncPostTrainingResource
     from .resources.vector_stores.vector_stores import VectorStoresResource, AsyncVectorStoresResource
 
 __all__ = [
@@ -187,36 +181,18 @@ def responses(self) -> ResponsesResource:
 
         return ResponsesResource(self)
 
-    @cached_property
-    def agents(self) -> AgentsResource:
-        from .resources.agents import AgentsResource
-
-        return AgentsResource(self)
-
     @cached_property
     def datasets(self) -> DatasetsResource:
         from .resources.datasets import DatasetsResource
 
         return DatasetsResource(self)
 
-    @cached_property
-    def eval(self) -> EvalResource:
-        from .resources.eval import EvalResource
-
-        return EvalResource(self)
-
     @cached_property
     def inspect(self) -> InspectResource:
         from .resources.inspect import InspectResource
 
         return InspectResource(self)
 
-    @cached_property
-    def inference(self) -> InferenceResource:
-        from .resources.inference import InferenceResource
-
-        return InferenceResource(self)
-
     @cached_property
     def embeddings(self) -> EmbeddingsResource:
         from .resources.embeddings import EmbeddingsResource
@@ -259,12 +235,6 @@ def models(self) -> ModelsResource:
 
         return ModelsResource(self)
 
-    @cached_property
-    def post_training(self) -> PostTrainingResource:
-        from .resources.post_training import PostTrainingResource
-
-        return PostTrainingResource(self)
-
     @cached_property
     def providers(self) -> ProvidersResource:
         from .resources.providers import ProvidersResource
@@ -331,6 +301,12 @@ def files(self) -> FilesResource:
 
         return FilesResource(self)
 
+    @cached_property
+    def alpha(self) -> AlphaResource:
+        from .resources.alpha import AlphaResource
+
+        return AlphaResource(self)
+
     @cached_property
     def with_raw_response(self) -> LlamaStackClientWithRawResponse:
         return LlamaStackClientWithRawResponse(self)
@@ -527,36 +503,18 @@ def responses(self) -> AsyncResponsesResource:
 
         return AsyncResponsesResource(self)
 
-    @cached_property
-    def agents(self) -> AsyncAgentsResource:
-        from .resources.agents import AsyncAgentsResource
-
-        return AsyncAgentsResource(self)
-
     @cached_property
     def datasets(self) -> AsyncDatasetsResource:
         from .resources.datasets import AsyncDatasetsResource
 
         return AsyncDatasetsResource(self)
 
-    @cached_property
-    def eval(self) -> AsyncEvalResource:
-        from .resources.eval import AsyncEvalResource
-
-        return AsyncEvalResource(self)
-
     @cached_property
     def inspect(self) -> AsyncInspectResource:
         from .resources.inspect import AsyncInspectResource
 
         return AsyncInspectResource(self)
 
-    @cached_property
-    def inference(self) -> AsyncInferenceResource:
-        from .resources.inference import AsyncInferenceResource
-
-        return AsyncInferenceResource(self)
-
     @cached_property
     def embeddings(self) -> AsyncEmbeddingsResource:
         from .resources.embeddings import AsyncEmbeddingsResource
@@ -599,12 +557,6 @@ def models(self) -> AsyncModelsResource:
 
         return AsyncModelsResource(self)
 
-    @cached_property
-    def post_training(self) -> AsyncPostTrainingResource:
-        from .resources.post_training import AsyncPostTrainingResource
-
-        return AsyncPostTrainingResource(self)
-
     @cached_property
     def providers(self) -> AsyncProvidersResource:
         from .resources.providers import AsyncProvidersResource
@@ -671,6 +623,12 @@ def files(self) -> AsyncFilesResource:
 
         return AsyncFilesResource(self)
 
+    @cached_property
+    def alpha(self) -> AsyncAlphaResource:
+        from .resources.alpha import AsyncAlphaResource
+
+        return AsyncAlphaResource(self)
+
     @cached_property
     def with_raw_response(self) -> AsyncLlamaStackClientWithRawResponse:
         return AsyncLlamaStackClientWithRawResponse(self)
@@ -816,36 +774,18 @@ def responses(self) -> responses.ResponsesResourceWithRawResponse:
 
         return ResponsesResourceWithRawResponse(self._client.responses)
 
-    @cached_property
-    def agents(self) -> agents.AgentsResourceWithRawResponse:
-        from .resources.agents import AgentsResourceWithRawResponse
-
-        return AgentsResourceWithRawResponse(self._client.agents)
-
     @cached_property
     def datasets(self) -> datasets.DatasetsResourceWithRawResponse:
         from .resources.datasets import DatasetsResourceWithRawResponse
 
         return DatasetsResourceWithRawResponse(self._client.datasets)
 
-    @cached_property
-    def eval(self) -> eval.EvalResourceWithRawResponse:
-        from .resources.eval import EvalResourceWithRawResponse
-
-        return EvalResourceWithRawResponse(self._client.eval)
-
     @cached_property
     def inspect(self) -> inspect.InspectResourceWithRawResponse:
         from .resources.inspect import InspectResourceWithRawResponse
 
         return InspectResourceWithRawResponse(self._client.inspect)
 
-    @cached_property
-    def inference(self) -> inference.InferenceResourceWithRawResponse:
-        from .resources.inference import InferenceResourceWithRawResponse
-
-        return InferenceResourceWithRawResponse(self._client.inference)
-
     @cached_property
     def embeddings(self) -> embeddings.EmbeddingsResourceWithRawResponse:
         from .resources.embeddings import EmbeddingsResourceWithRawResponse
@@ -888,12 +828,6 @@ def models(self) -> models.ModelsResourceWithRawResponse:
 
         return ModelsResourceWithRawResponse(self._client.models)
 
-    @cached_property
-    def post_training(self) -> post_training.PostTrainingResourceWithRawResponse:
-        from .resources.post_training import PostTrainingResourceWithRawResponse
-
-        return PostTrainingResourceWithRawResponse(self._client.post_training)
-
     @cached_property
     def providers(self) -> providers.ProvidersResourceWithRawResponse:
         from .resources.providers import ProvidersResourceWithRawResponse
@@ -960,6 +894,12 @@ def files(self) -> files.FilesResourceWithRawResponse:
 
         return FilesResourceWithRawResponse(self._client.files)
 
+    @cached_property
+    def alpha(self) -> alpha.AlphaResourceWithRawResponse:
+        from .resources.alpha import AlphaResourceWithRawResponse
+
+        return AlphaResourceWithRawResponse(self._client.alpha)
+
 
 class AsyncLlamaStackClientWithRawResponse:
     _client: AsyncLlamaStackClient
@@ -991,36 +931,18 @@ def responses(self) -> responses.AsyncResponsesResourceWithRawResponse:
 
         return AsyncResponsesResourceWithRawResponse(self._client.responses)
 
-    @cached_property
-    def agents(self) -> agents.AsyncAgentsResourceWithRawResponse:
-        from .resources.agents import AsyncAgentsResourceWithRawResponse
-
-        return AsyncAgentsResourceWithRawResponse(self._client.agents)
-
     @cached_property
     def datasets(self) -> datasets.AsyncDatasetsResourceWithRawResponse:
         from .resources.datasets import AsyncDatasetsResourceWithRawResponse
 
         return AsyncDatasetsResourceWithRawResponse(self._client.datasets)
 
-    @cached_property
-    def eval(self) -> eval.AsyncEvalResourceWithRawResponse:
-        from .resources.eval import AsyncEvalResourceWithRawResponse
-
-        return AsyncEvalResourceWithRawResponse(self._client.eval)
-
     @cached_property
     def inspect(self) -> inspect.AsyncInspectResourceWithRawResponse:
         from .resources.inspect import AsyncInspectResourceWithRawResponse
 
         return AsyncInspectResourceWithRawResponse(self._client.inspect)
 
-    @cached_property
-    def inference(self) -> inference.AsyncInferenceResourceWithRawResponse:
-        from .resources.inference import AsyncInferenceResourceWithRawResponse
-
-        return AsyncInferenceResourceWithRawResponse(self._client.inference)
-
     @cached_property
     def embeddings(self) -> embeddings.AsyncEmbeddingsResourceWithRawResponse:
         from .resources.embeddings import AsyncEmbeddingsResourceWithRawResponse
@@ -1063,12 +985,6 @@ def models(self) -> models.AsyncModelsResourceWithRawResponse:
 
         return AsyncModelsResourceWithRawResponse(self._client.models)
 
-    @cached_property
-    def post_training(self) -> post_training.AsyncPostTrainingResourceWithRawResponse:
-        from .resources.post_training import AsyncPostTrainingResourceWithRawResponse
-
-        return AsyncPostTrainingResourceWithRawResponse(self._client.post_training)
-
     @cached_property
     def providers(self) -> providers.AsyncProvidersResourceWithRawResponse:
         from .resources.providers import AsyncProvidersResourceWithRawResponse
@@ -1137,6 +1053,12 @@ def files(self) -> files.AsyncFilesResourceWithRawResponse:
 
         return AsyncFilesResourceWithRawResponse(self._client.files)
 
+    @cached_property
+    def alpha(self) -> alpha.AsyncAlphaResourceWithRawResponse:
+        from .resources.alpha import AsyncAlphaResourceWithRawResponse
+
+        return AsyncAlphaResourceWithRawResponse(self._client.alpha)
+
 
 class LlamaStackClientWithStreamedResponse:
     _client: LlamaStackClient
@@ -1168,36 +1090,18 @@ def responses(self) -> responses.ResponsesResourceWithStreamingResponse:
 
         return ResponsesResourceWithStreamingResponse(self._client.responses)
 
-    @cached_property
-    def agents(self) -> agents.AgentsResourceWithStreamingResponse:
-        from .resources.agents import AgentsResourceWithStreamingResponse
-
-        return AgentsResourceWithStreamingResponse(self._client.agents)
-
     @cached_property
     def datasets(self) -> datasets.DatasetsResourceWithStreamingResponse:
         from .resources.datasets import DatasetsResourceWithStreamingResponse
 
         return DatasetsResourceWithStreamingResponse(self._client.datasets)
 
-    @cached_property
-    def eval(self) -> eval.EvalResourceWithStreamingResponse:
-        from .resources.eval import EvalResourceWithStreamingResponse
-
-        return EvalResourceWithStreamingResponse(self._client.eval)
-
     @cached_property
     def inspect(self) -> inspect.InspectResourceWithStreamingResponse:
         from .resources.inspect import InspectResourceWithStreamingResponse
 
         return InspectResourceWithStreamingResponse(self._client.inspect)
 
-    @cached_property
-    def inference(self) -> inference.InferenceResourceWithStreamingResponse:
-        from .resources.inference import InferenceResourceWithStreamingResponse
-
-        return InferenceResourceWithStreamingResponse(self._client.inference)
-
     @cached_property
     def embeddings(self) -> embeddings.EmbeddingsResourceWithStreamingResponse:
         from .resources.embeddings import EmbeddingsResourceWithStreamingResponse
@@ -1240,12 +1144,6 @@ def models(self) -> models.ModelsResourceWithStreamingResponse:
 
         return ModelsResourceWithStreamingResponse(self._client.models)
 
-    @cached_property
-    def post_training(self) -> post_training.PostTrainingResourceWithStreamingResponse:
-        from .resources.post_training import PostTrainingResourceWithStreamingResponse
-
-        return PostTrainingResourceWithStreamingResponse(self._client.post_training)
-
     @cached_property
     def providers(self) -> providers.ProvidersResourceWithStreamingResponse:
         from .resources.providers import ProvidersResourceWithStreamingResponse
@@ -1314,6 +1212,12 @@ def files(self) -> files.FilesResourceWithStreamingResponse:
 
         return FilesResourceWithStreamingResponse(self._client.files)
 
+    @cached_property
+    def alpha(self) -> alpha.AlphaResourceWithStreamingResponse:
+        from .resources.alpha import AlphaResourceWithStreamingResponse
+
+        return AlphaResourceWithStreamingResponse(self._client.alpha)
+
 
 class AsyncLlamaStackClientWithStreamedResponse:
     _client: AsyncLlamaStackClient
@@ -1345,36 +1249,18 @@ def responses(self) -> responses.AsyncResponsesResourceWithStreamingResponse:
 
         return AsyncResponsesResourceWithStreamingResponse(self._client.responses)
 
-    @cached_property
-    def agents(self) -> agents.AsyncAgentsResourceWithStreamingResponse:
-        from .resources.agents import AsyncAgentsResourceWithStreamingResponse
-
-        return AsyncAgentsResourceWithStreamingResponse(self._client.agents)
-
     @cached_property
     def datasets(self) -> datasets.AsyncDatasetsResourceWithStreamingResponse:
         from .resources.datasets import AsyncDatasetsResourceWithStreamingResponse
 
         return AsyncDatasetsResourceWithStreamingResponse(self._client.datasets)
 
-    @cached_property
-    def eval(self) -> eval.AsyncEvalResourceWithStreamingResponse:
-        from .resources.eval import AsyncEvalResourceWithStreamingResponse
-
-        return AsyncEvalResourceWithStreamingResponse(self._client.eval)
-
     @cached_property
     def inspect(self) -> inspect.AsyncInspectResourceWithStreamingResponse:
         from .resources.inspect import AsyncInspectResourceWithStreamingResponse
 
         return AsyncInspectResourceWithStreamingResponse(self._client.inspect)
 
-    @cached_property
-    def inference(self) -> inference.AsyncInferenceResourceWithStreamingResponse:
-        from .resources.inference import AsyncInferenceResourceWithStreamingResponse
-
-        return AsyncInferenceResourceWithStreamingResponse(self._client.inference)
-
     @cached_property
     def embeddings(self) -> embeddings.AsyncEmbeddingsResourceWithStreamingResponse:
         from .resources.embeddings import AsyncEmbeddingsResourceWithStreamingResponse
@@ -1417,12 +1303,6 @@ def models(self) -> models.AsyncModelsResourceWithStreamingResponse:
 
         return AsyncModelsResourceWithStreamingResponse(self._client.models)
 
-    @cached_property
-    def post_training(self) -> post_training.AsyncPostTrainingResourceWithStreamingResponse:
-        from .resources.post_training import AsyncPostTrainingResourceWithStreamingResponse
-
-        return AsyncPostTrainingResourceWithStreamingResponse(self._client.post_training)
-
     @cached_property
     def providers(self) -> providers.AsyncProvidersResourceWithStreamingResponse:
         from .resources.providers import AsyncProvidersResourceWithStreamingResponse
@@ -1491,6 +1371,12 @@ def files(self) -> files.AsyncFilesResourceWithStreamingResponse:
 
         return AsyncFilesResourceWithStreamingResponse(self._client.files)
 
+    @cached_property
+    def alpha(self) -> alpha.AsyncAlphaResourceWithStreamingResponse:
+        from .resources.alpha import AsyncAlphaResourceWithStreamingResponse
+
+        return AsyncAlphaResourceWithStreamingResponse(self._client.alpha)
+
 
 Client = LlamaStackClient
 
diff --git a/src/llama_stack_client/lib/agents/agent.py b/src/llama_stack_client/lib/agents/agent.py
index 5dc2f236..779c44c2 100644
--- a/src/llama_stack_client/lib/agents/agent.py
+++ b/src/llama_stack_client/lib/agents/agent.py
@@ -7,13 +7,14 @@
 from typing import Any, AsyncIterator, Callable, Iterator, List, Optional, Tuple, Union
 
 from llama_stack_client import LlamaStackClient
-from llama_stack_client.types import ToolResponseMessage, ToolResponseParam, UserMessage
-from llama_stack_client.types.agent_create_params import AgentConfig
-from llama_stack_client.types.agents.agent_turn_response_stream_chunk import (
+from llama_stack_client.types import ToolResponseMessage, UserMessage
+from llama_stack_client.types.alpha import ToolResponseParam
+from llama_stack_client.types.alpha.agent_create_params import AgentConfig
+from llama_stack_client.types.alpha.agents.agent_turn_response_stream_chunk import (
     AgentTurnResponseStreamChunk,
 )
-from llama_stack_client.types.agents.turn import CompletionMessage, Turn
-from llama_stack_client.types.agents.turn_create_params import Document, Toolgroup
+from llama_stack_client.types.alpha.agents.turn import CompletionMessage, Turn
+from llama_stack_client.types.alpha.agents.turn_create_params import Document, Toolgroup
 from llama_stack_client.types.shared.tool_call import ToolCall
 from llama_stack_client.types.shared_params.agent_config import ToolConfig
 from llama_stack_client.types.shared_params.response_format import ResponseFormat
@@ -203,7 +204,7 @@ def __init__(
         self.initialize()
 
     def initialize(self) -> None:
-        agentic_system_create_response = self.client.agents.create(
+        agentic_system_create_response = self.client.alpha.agents.create(
             agent_config=self.agent_config,
             extra_headers=self.extra_headers,
         )
@@ -214,7 +215,7 @@ def initialize(self) -> None:
                 self.builtin_tools[tool.identifier] = tg.get("args", {}) if isinstance(tg, dict) else {}
 
     def create_session(self, session_name: str) -> str:
-        agentic_system_create_session_response = self.client.agents.session.create(
+        agentic_system_create_session_response = self.client.alpha.agents.session.create(
             agent_id=self.agent_id,
             session_name=session_name,
             extra_headers=self.extra_headers,
@@ -322,7 +323,7 @@ def _create_turn_streaming(
         n_iter = 0
 
         # 1. create an agent turn
-        turn_response = self.client.agents.turn.create(
+        turn_response = self.client.alpha.agents.turn.create(
             agent_id=self.agent_id,
             # use specified session_id or last session created
             session_id=session_id or self.session_id[-1],
@@ -361,7 +362,7 @@ def _create_turn_streaming(
                     tool_responses = self._run_tool_calls(tool_calls)
 
                     # pass it to next iteration
-                    turn_response = self.client.agents.turn.resume(
+                    turn_response = self.client.alpha.agents.turn.resume(
                         agent_id=self.agent_id,
                         session_id=session_id or self.session_id[-1],
                         turn_id=turn_id,
@@ -468,7 +469,7 @@ async def initialize(self) -> None:
         if self._agent_id:
             return
 
-        agentic_system_create_response = await self.client.agents.create(
+        agentic_system_create_response = await self.client.alpha.agents.create(
             agent_config=self.agent_config,
         )
         self._agent_id = agentic_system_create_response.agent_id
@@ -478,7 +479,7 @@ async def initialize(self) -> None:
 
     async def create_session(self, session_name: str) -> str:
         await self.initialize()
-        agentic_system_create_session_response = await self.client.agents.session.create(
+        agentic_system_create_session_response = await self.client.alpha.agents.session.create(
             agent_id=self.agent_id,
             session_name=session_name,
             extra_headers=self.extra_headers,
@@ -558,7 +559,7 @@ async def _create_turn_streaming(
         n_iter = 0
 
         # 1. create an agent turn
-        turn_response = await self.client.agents.turn.create(
+        turn_response = await self.client.alpha.agents.turn.create(
             agent_id=self.agent_id,
             # use specified session_id or last session created
             session_id=session_id or self.session_id[-1],
@@ -596,7 +597,7 @@ async def _create_turn_streaming(
                     tool_responses = await self._run_tool_calls(tool_calls)
 
                     # pass it to next iteration
-                    turn_response = await self.client.agents.turn.resume(
+                    turn_response = await self.client.alpha.agents.turn.resume(
                         agent_id=self.agent_id,
                         session_id=session_id or self.session_id[-1],
                         turn_id=turn_id,
diff --git a/src/llama_stack_client/lib/agents/client_tool.py b/src/llama_stack_client/lib/agents/client_tool.py
index c199b211..f017d651 100644
--- a/src/llama_stack_client/lib/agents/client_tool.py
+++ b/src/llama_stack_client/lib/agents/client_tool.py
@@ -19,7 +19,8 @@
     Union,
 )
 
-from llama_stack_client.types import CompletionMessage, Message, ToolResponse
+from llama_stack_client.types import CompletionMessage, Message
+from llama_stack_client.types.alpha import ToolResponse
 from llama_stack_client.types.tool_def_param import Parameter, ToolDefParam
 
 
diff --git a/src/llama_stack_client/lib/agents/tool_parser.py b/src/llama_stack_client/lib/agents/tool_parser.py
index dc0c5ba4..ca8d28ea 100644
--- a/src/llama_stack_client/lib/agents/tool_parser.py
+++ b/src/llama_stack_client/lib/agents/tool_parser.py
@@ -7,7 +7,7 @@
 from abc import abstractmethod
 from typing import List
 
-from llama_stack_client.types.agents.turn import CompletionMessage
+from llama_stack_client.types.alpha.agents.turn import CompletionMessage
 from llama_stack_client.types.shared.tool_call import ToolCall
 
 
diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py
index 01e17f57..3089ae21 100644
--- a/src/llama_stack_client/resources/__init__.py
+++ b/src/llama_stack_client/resources/__init__.py
@@ -8,13 +8,13 @@
     ChatResourceWithStreamingResponse,
     AsyncChatResourceWithStreamingResponse,
 )
-from .eval import (
-    EvalResource,
-    AsyncEvalResource,
-    EvalResourceWithRawResponse,
-    AsyncEvalResourceWithRawResponse,
-    EvalResourceWithStreamingResponse,
-    AsyncEvalResourceWithStreamingResponse,
+from .alpha import (
+    AlphaResource,
+    AsyncAlphaResource,
+    AlphaResourceWithRawResponse,
+    AsyncAlphaResourceWithRawResponse,
+    AlphaResourceWithStreamingResponse,
+    AsyncAlphaResourceWithStreamingResponse,
 )
 from .files import (
     FilesResource,
@@ -32,14 +32,6 @@
     ToolsResourceWithStreamingResponse,
     AsyncToolsResourceWithStreamingResponse,
 )
-from .agents import (
-    AgentsResource,
-    AsyncAgentsResource,
-    AgentsResourceWithRawResponse,
-    AsyncAgentsResourceWithRawResponse,
-    AgentsResourceWithStreamingResponse,
-    AsyncAgentsResourceWithStreamingResponse,
-)
 from .models import (
     ModelsResource,
     AsyncModelsResource,
@@ -96,14 +88,6 @@
     DatasetsResourceWithStreamingResponse,
     AsyncDatasetsResourceWithStreamingResponse,
 )
-from .inference import (
-    InferenceResource,
-    AsyncInferenceResource,
-    InferenceResourceWithRawResponse,
-    AsyncInferenceResourceWithRawResponse,
-    InferenceResourceWithStreamingResponse,
-    AsyncInferenceResourceWithStreamingResponse,
-)
 from .providers import (
     ProvidersResource,
     AsyncProvidersResource,
@@ -192,14 +176,6 @@
     ToolRuntimeResourceWithStreamingResponse,
     AsyncToolRuntimeResourceWithStreamingResponse,
 )
-from .post_training import (
-    PostTrainingResource,
-    AsyncPostTrainingResource,
-    PostTrainingResourceWithRawResponse,
-    AsyncPostTrainingResourceWithRawResponse,
-    PostTrainingResourceWithStreamingResponse,
-    AsyncPostTrainingResourceWithStreamingResponse,
-)
 from .vector_stores import (
     VectorStoresResource,
     AsyncVectorStoresResource,
@@ -250,36 +226,18 @@
     "AsyncResponsesResourceWithRawResponse",
     "ResponsesResourceWithStreamingResponse",
     "AsyncResponsesResourceWithStreamingResponse",
-    "AgentsResource",
-    "AsyncAgentsResource",
-    "AgentsResourceWithRawResponse",
-    "AsyncAgentsResourceWithRawResponse",
-    "AgentsResourceWithStreamingResponse",
-    "AsyncAgentsResourceWithStreamingResponse",
     "DatasetsResource",
     "AsyncDatasetsResource",
     "DatasetsResourceWithRawResponse",
     "AsyncDatasetsResourceWithRawResponse",
     "DatasetsResourceWithStreamingResponse",
     "AsyncDatasetsResourceWithStreamingResponse",
-    "EvalResource",
-    "AsyncEvalResource",
-    "EvalResourceWithRawResponse",
-    "AsyncEvalResourceWithRawResponse",
-    "EvalResourceWithStreamingResponse",
-    "AsyncEvalResourceWithStreamingResponse",
     "InspectResource",
     "AsyncInspectResource",
     "InspectResourceWithRawResponse",
     "AsyncInspectResourceWithRawResponse",
     "InspectResourceWithStreamingResponse",
     "AsyncInspectResourceWithStreamingResponse",
-    "InferenceResource",
-    "AsyncInferenceResource",
-    "InferenceResourceWithRawResponse",
-    "AsyncInferenceResourceWithRawResponse",
-    "InferenceResourceWithStreamingResponse",
-    "AsyncInferenceResourceWithStreamingResponse",
     "EmbeddingsResource",
     "AsyncEmbeddingsResource",
     "EmbeddingsResourceWithRawResponse",
@@ -322,12 +280,6 @@
     "AsyncModelsResourceWithRawResponse",
     "ModelsResourceWithStreamingResponse",
     "AsyncModelsResourceWithStreamingResponse",
-    "PostTrainingResource",
-    "AsyncPostTrainingResource",
-    "PostTrainingResourceWithRawResponse",
-    "AsyncPostTrainingResourceWithRawResponse",
-    "PostTrainingResourceWithStreamingResponse",
-    "AsyncPostTrainingResourceWithStreamingResponse",
     "ProvidersResource",
     "AsyncProvidersResource",
     "ProvidersResourceWithRawResponse",
@@ -394,4 +346,10 @@
     "AsyncFilesResourceWithRawResponse",
     "FilesResourceWithStreamingResponse",
     "AsyncFilesResourceWithStreamingResponse",
+    "AlphaResource",
+    "AsyncAlphaResource",
+    "AlphaResourceWithRawResponse",
+    "AsyncAlphaResourceWithRawResponse",
+    "AlphaResourceWithStreamingResponse",
+    "AsyncAlphaResourceWithStreamingResponse",
 ]
diff --git a/src/llama_stack_client/resources/alpha/__init__.py b/src/llama_stack_client/resources/alpha/__init__.py
new file mode 100644
index 00000000..c3c4b0d9
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/__init__.py
@@ -0,0 +1,75 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .eval import (
+    EvalResource,
+    AsyncEvalResource,
+    EvalResourceWithRawResponse,
+    AsyncEvalResourceWithRawResponse,
+    EvalResourceWithStreamingResponse,
+    AsyncEvalResourceWithStreamingResponse,
+)
+from .alpha import (
+    AlphaResource,
+    AsyncAlphaResource,
+    AlphaResourceWithRawResponse,
+    AsyncAlphaResourceWithRawResponse,
+    AlphaResourceWithStreamingResponse,
+    AsyncAlphaResourceWithStreamingResponse,
+)
+from .agents import (
+    AgentsResource,
+    AsyncAgentsResource,
+    AgentsResourceWithRawResponse,
+    AsyncAgentsResourceWithRawResponse,
+    AgentsResourceWithStreamingResponse,
+    AsyncAgentsResourceWithStreamingResponse,
+)
+from .inference import (
+    InferenceResource,
+    AsyncInferenceResource,
+    InferenceResourceWithRawResponse,
+    AsyncInferenceResourceWithRawResponse,
+    InferenceResourceWithStreamingResponse,
+    AsyncInferenceResourceWithStreamingResponse,
+)
+from .post_training import (
+    PostTrainingResource,
+    AsyncPostTrainingResource,
+    PostTrainingResourceWithRawResponse,
+    AsyncPostTrainingResourceWithRawResponse,
+    PostTrainingResourceWithStreamingResponse,
+    AsyncPostTrainingResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "InferenceResource",
+    "AsyncInferenceResource",
+    "InferenceResourceWithRawResponse",
+    "AsyncInferenceResourceWithRawResponse",
+    "InferenceResourceWithStreamingResponse",
+    "AsyncInferenceResourceWithStreamingResponse",
+    "PostTrainingResource",
+    "AsyncPostTrainingResource",
+    "PostTrainingResourceWithRawResponse",
+    "AsyncPostTrainingResourceWithRawResponse",
+    "PostTrainingResourceWithStreamingResponse",
+    "AsyncPostTrainingResourceWithStreamingResponse",
+    "EvalResource",
+    "AsyncEvalResource",
+    "EvalResourceWithRawResponse",
+    "AsyncEvalResourceWithRawResponse",
+    "EvalResourceWithStreamingResponse",
+    "AsyncEvalResourceWithStreamingResponse",
+    "AgentsResource",
+    "AsyncAgentsResource",
+    "AgentsResourceWithRawResponse",
+    "AsyncAgentsResourceWithRawResponse",
+    "AgentsResourceWithStreamingResponse",
+    "AsyncAgentsResourceWithStreamingResponse",
+    "AlphaResource",
+    "AsyncAlphaResource",
+    "AlphaResourceWithRawResponse",
+    "AsyncAlphaResourceWithRawResponse",
+    "AlphaResourceWithStreamingResponse",
+    "AsyncAlphaResourceWithStreamingResponse",
+]
diff --git a/src/llama_stack_client/resources/agents/__init__.py b/src/llama_stack_client/resources/alpha/agents/__init__.py
similarity index 100%
rename from src/llama_stack_client/resources/agents/__init__.py
rename to src/llama_stack_client/resources/alpha/agents/__init__.py
diff --git a/src/llama_stack_client/resources/agents/agents.py b/src/llama_stack_client/resources/alpha/agents/agents.py
similarity index 94%
rename from src/llama_stack_client/resources/agents/agents.py
rename to src/llama_stack_client/resources/alpha/agents/agents.py
index 50d65a60..0e81cce7 100644
--- a/src/llama_stack_client/resources/agents/agents.py
+++ b/src/llama_stack_client/resources/alpha/agents/agents.py
@@ -20,7 +20,6 @@
     StepsResourceWithStreamingResponse,
     AsyncStepsResourceWithStreamingResponse,
 )
-from ...types import agent_list_params, agent_create_params
 from .session import (
     SessionResource,
     AsyncSessionResource,
@@ -29,21 +28,22 @@
     SessionResourceWithStreamingResponse,
     AsyncSessionResourceWithStreamingResponse,
 )
-from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._base_client import make_request_options
-from ...types.agent_list_response import AgentListResponse
-from ...types.agent_create_response import AgentCreateResponse
-from ...types.agent_retrieve_response import AgentRetrieveResponse
-from ...types.shared_params.agent_config import AgentConfig
+from ....types.alpha import agent_list_params, agent_create_params
+from ...._base_client import make_request_options
+from ....types.alpha.agent_list_response import AgentListResponse
+from ....types.shared_params.agent_config import AgentConfig
+from ....types.alpha.agent_create_response import AgentCreateResponse
+from ....types.alpha.agent_retrieve_response import AgentRetrieveResponse
 
 __all__ = ["AgentsResource", "AsyncAgentsResource"]
 
@@ -106,7 +106,7 @@ def create(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._post(
-            "/v1/agents",
+            "/v1alpha/agents",
             body=maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -140,7 +140,7 @@ def retrieve(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         return self._get(
-            f"/v1/agents/{agent_id}",
+            f"/v1alpha/agents/{agent_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -176,7 +176,7 @@ def list(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._get(
-            "/v1/agents",
+            "/v1alpha/agents",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -220,7 +220,7 @@ def delete(
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return self._delete(
-            f"/v1/agents/{agent_id}",
+            f"/v1alpha/agents/{agent_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -286,7 +286,7 @@ async def create(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._post(
-            "/v1/agents",
+            "/v1alpha/agents",
             body=await async_maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -320,7 +320,7 @@ async def retrieve(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         return await self._get(
-            f"/v1/agents/{agent_id}",
+            f"/v1alpha/agents/{agent_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -356,7 +356,7 @@ async def list(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._get(
-            "/v1/agents",
+            "/v1alpha/agents",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -400,7 +400,7 @@ async def delete(
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return await self._delete(
-            f"/v1/agents/{agent_id}",
+            f"/v1alpha/agents/{agent_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
diff --git a/src/llama_stack_client/resources/agents/session.py b/src/llama_stack_client/resources/alpha/agents/session.py
similarity index 93%
rename from src/llama_stack_client/resources/agents/session.py
rename to src/llama_stack_client/resources/alpha/agents/session.py
index de5b35de..2e980add 100644
--- a/src/llama_stack_client/resources/agents/session.py
+++ b/src/llama_stack_client/resources/alpha/agents/session.py
@@ -4,21 +4,21 @@
 
 import httpx
 
-from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, SequenceNotStr, omit, not_given
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Omit, Query, Headers, NoneType, NotGiven, SequenceNotStr, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._base_client import make_request_options
-from ...types.agents import session_list_params, session_create_params, session_retrieve_params
-from ...types.agents.session import Session
-from ...types.agents.session_list_response import SessionListResponse
-from ...types.agents.session_create_response import SessionCreateResponse
+from ...._base_client import make_request_options
+from ....types.alpha.agents import session_list_params, session_create_params, session_retrieve_params
+from ....types.alpha.agents.session import Session
+from ....types.alpha.agents.session_list_response import SessionListResponse
+from ....types.alpha.agents.session_create_response import SessionCreateResponse
 
 __all__ = ["SessionResource", "AsyncSessionResource"]
 
@@ -72,7 +72,7 @@ def create(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         return self._post(
-            f"/v1/agents/{agent_id}/session",
+            f"/v1alpha/agents/{agent_id}/session",
             body=maybe_transform({"session_name": session_name}, session_create_params.SessionCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -112,7 +112,7 @@ def retrieve(
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         return self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -155,7 +155,7 @@ def list(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         return self._get(
-            f"/v1/agents/{agent_id}/sessions",
+            f"/v1alpha/agents/{agent_id}/sessions",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -202,7 +202,7 @@ def delete(
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return self._delete(
-            f"/v1/agents/{agent_id}/session/{session_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -259,7 +259,7 @@ async def create(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         return await self._post(
-            f"/v1/agents/{agent_id}/session",
+            f"/v1alpha/agents/{agent_id}/session",
             body=await async_maybe_transform({"session_name": session_name}, session_create_params.SessionCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -299,7 +299,7 @@ async def retrieve(
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         return await self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -344,7 +344,7 @@ async def list(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         return await self._get(
-            f"/v1/agents/{agent_id}/sessions",
+            f"/v1alpha/agents/{agent_id}/sessions",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -391,7 +391,7 @@ async def delete(
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return await self._delete(
-            f"/v1/agents/{agent_id}/session/{session_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
diff --git a/src/llama_stack_client/resources/agents/steps.py b/src/llama_stack_client/resources/alpha/agents/steps.py
similarity index 92%
rename from src/llama_stack_client/resources/agents/steps.py
rename to src/llama_stack_client/resources/alpha/agents/steps.py
index 94138edc..838822d0 100644
--- a/src/llama_stack_client/resources/agents/steps.py
+++ b/src/llama_stack_client/resources/alpha/agents/steps.py
@@ -4,17 +4,17 @@
 
 import httpx
 
-from ..._types import Body, Query, Headers, NotGiven, not_given
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Query, Headers, NotGiven, not_given
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._base_client import make_request_options
-from ...types.agents.step_retrieve_response import StepRetrieveResponse
+from ...._base_client import make_request_options
+from ....types.alpha.agents.step_retrieve_response import StepRetrieveResponse
 
 __all__ = ["StepsResource", "AsyncStepsResource"]
 
@@ -74,7 +74,7 @@ def retrieve(
         if not step_id:
             raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
         return self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -137,7 +137,7 @@ async def retrieve(
         if not step_id:
             raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
         return await self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/alpha/agents/turn.py
similarity index 96%
rename from src/llama_stack_client/resources/agents/turn.py
rename to src/llama_stack_client/resources/alpha/agents/turn.py
index a10da847..ffe766b6 100644
--- a/src/llama_stack_client/resources/agents/turn.py
+++ b/src/llama_stack_client/resources/alpha/agents/turn.py
@@ -7,22 +7,22 @@
 
 import httpx
 
-from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
-from ..._utils import required_args, maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ...._utils import required_args, maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._streaming import Stream, AsyncStream
-from ..._base_client import make_request_options
-from ...types.agents import turn_create_params, turn_resume_params
-from ...types.agents.turn import Turn
-from ...types.tool_response_param import ToolResponseParam
-from ...types.agents.agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk
+from ...._streaming import Stream, AsyncStream
+from ...._base_client import make_request_options
+from ....types.alpha.agents import turn_create_params, turn_resume_params
+from ....types.alpha.agents.turn import Turn
+from ....types.alpha.tool_response_param import ToolResponseParam
+from ....types.alpha.agents.agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk
 
 __all__ = ["TurnResource", "AsyncTurnResource"]
 
@@ -205,7 +205,7 @@ def create(
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         return self._post(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn",
             body=maybe_transform(
                 {
                     "messages": messages,
@@ -258,7 +258,7 @@ def retrieve(
         if not turn_id:
             raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
         return self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -402,7 +402,7 @@ def resume(
         if not turn_id:
             raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
         return self._post(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
             body=maybe_transform(
                 {
                     "tool_responses": tool_responses,
@@ -599,7 +599,7 @@ async def create(
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         return await self._post(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn",
             body=await async_maybe_transform(
                 {
                     "messages": messages,
@@ -652,7 +652,7 @@ async def retrieve(
         if not turn_id:
             raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
         return await self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -796,7 +796,7 @@ async def resume(
         if not turn_id:
             raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
         return await self._post(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
             body=await async_maybe_transform(
                 {
                     "tool_responses": tool_responses,
diff --git a/src/llama_stack_client/resources/alpha/alpha.py b/src/llama_stack_client/resources/alpha/alpha.py
new file mode 100644
index 00000000..77d4115b
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/alpha.py
@@ -0,0 +1,198 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from .eval.eval import (
+    EvalResource,
+    AsyncEvalResource,
+    EvalResourceWithRawResponse,
+    AsyncEvalResourceWithRawResponse,
+    EvalResourceWithStreamingResponse,
+    AsyncEvalResourceWithStreamingResponse,
+)
+from .inference import (
+    InferenceResource,
+    AsyncInferenceResource,
+    InferenceResourceWithRawResponse,
+    AsyncInferenceResourceWithRawResponse,
+    InferenceResourceWithStreamingResponse,
+    AsyncInferenceResourceWithStreamingResponse,
+)
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .agents.agents import (
+    AgentsResource,
+    AsyncAgentsResource,
+    AgentsResourceWithRawResponse,
+    AsyncAgentsResourceWithRawResponse,
+    AgentsResourceWithStreamingResponse,
+    AsyncAgentsResourceWithStreamingResponse,
+)
+from .post_training.post_training import (
+    PostTrainingResource,
+    AsyncPostTrainingResource,
+    PostTrainingResourceWithRawResponse,
+    AsyncPostTrainingResourceWithRawResponse,
+    PostTrainingResourceWithStreamingResponse,
+    AsyncPostTrainingResourceWithStreamingResponse,
+)
+
+__all__ = ["AlphaResource", "AsyncAlphaResource"]
+
+
+class AlphaResource(SyncAPIResource):
+    @cached_property
+    def inference(self) -> InferenceResource:
+        return InferenceResource(self._client)
+
+    @cached_property
+    def post_training(self) -> PostTrainingResource:
+        return PostTrainingResource(self._client)
+
+    @cached_property
+    def eval(self) -> EvalResource:
+        return EvalResource(self._client)
+
+    @cached_property
+    def agents(self) -> AgentsResource:
+        return AgentsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AlphaResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AlphaResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AlphaResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AlphaResourceWithStreamingResponse(self)
+
+
+class AsyncAlphaResource(AsyncAPIResource):
+    @cached_property
+    def inference(self) -> AsyncInferenceResource:
+        return AsyncInferenceResource(self._client)
+
+    @cached_property
+    def post_training(self) -> AsyncPostTrainingResource:
+        return AsyncPostTrainingResource(self._client)
+
+    @cached_property
+    def eval(self) -> AsyncEvalResource:
+        return AsyncEvalResource(self._client)
+
+    @cached_property
+    def agents(self) -> AsyncAgentsResource:
+        return AsyncAgentsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncAlphaResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAlphaResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAlphaResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncAlphaResourceWithStreamingResponse(self)
+
+
+class AlphaResourceWithRawResponse:
+    def __init__(self, alpha: AlphaResource) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def inference(self) -> InferenceResourceWithRawResponse:
+        return InferenceResourceWithRawResponse(self._alpha.inference)
+
+    @cached_property
+    def post_training(self) -> PostTrainingResourceWithRawResponse:
+        return PostTrainingResourceWithRawResponse(self._alpha.post_training)
+
+    @cached_property
+    def eval(self) -> EvalResourceWithRawResponse:
+        return EvalResourceWithRawResponse(self._alpha.eval)
+
+    @cached_property
+    def agents(self) -> AgentsResourceWithRawResponse:
+        return AgentsResourceWithRawResponse(self._alpha.agents)
+
+
+class AsyncAlphaResourceWithRawResponse:
+    def __init__(self, alpha: AsyncAlphaResource) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def inference(self) -> AsyncInferenceResourceWithRawResponse:
+        return AsyncInferenceResourceWithRawResponse(self._alpha.inference)
+
+    @cached_property
+    def post_training(self) -> AsyncPostTrainingResourceWithRawResponse:
+        return AsyncPostTrainingResourceWithRawResponse(self._alpha.post_training)
+
+    @cached_property
+    def eval(self) -> AsyncEvalResourceWithRawResponse:
+        return AsyncEvalResourceWithRawResponse(self._alpha.eval)
+
+    @cached_property
+    def agents(self) -> AsyncAgentsResourceWithRawResponse:
+        return AsyncAgentsResourceWithRawResponse(self._alpha.agents)
+
+
+class AlphaResourceWithStreamingResponse:
+    def __init__(self, alpha: AlphaResource) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def inference(self) -> InferenceResourceWithStreamingResponse:
+        return InferenceResourceWithStreamingResponse(self._alpha.inference)
+
+    @cached_property
+    def post_training(self) -> PostTrainingResourceWithStreamingResponse:
+        return PostTrainingResourceWithStreamingResponse(self._alpha.post_training)
+
+    @cached_property
+    def eval(self) -> EvalResourceWithStreamingResponse:
+        return EvalResourceWithStreamingResponse(self._alpha.eval)
+
+    @cached_property
+    def agents(self) -> AgentsResourceWithStreamingResponse:
+        return AgentsResourceWithStreamingResponse(self._alpha.agents)
+
+
+class AsyncAlphaResourceWithStreamingResponse:
+    def __init__(self, alpha: AsyncAlphaResource) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def inference(self) -> AsyncInferenceResourceWithStreamingResponse:
+        return AsyncInferenceResourceWithStreamingResponse(self._alpha.inference)
+
+    @cached_property
+    def post_training(self) -> AsyncPostTrainingResourceWithStreamingResponse:
+        return AsyncPostTrainingResourceWithStreamingResponse(self._alpha.post_training)
+
+    @cached_property
+    def eval(self) -> AsyncEvalResourceWithStreamingResponse:
+        return AsyncEvalResourceWithStreamingResponse(self._alpha.eval)
+
+    @cached_property
+    def agents(self) -> AsyncAgentsResourceWithStreamingResponse:
+        return AsyncAgentsResourceWithStreamingResponse(self._alpha.agents)
diff --git a/src/llama_stack_client/resources/eval/__init__.py b/src/llama_stack_client/resources/alpha/eval/__init__.py
similarity index 100%
rename from src/llama_stack_client/resources/eval/__init__.py
rename to src/llama_stack_client/resources/alpha/eval/__init__.py
diff --git a/src/llama_stack_client/resources/eval/eval.py b/src/llama_stack_client/resources/alpha/eval/eval.py
similarity index 94%
rename from src/llama_stack_client/resources/eval/eval.py
rename to src/llama_stack_client/resources/alpha/eval/eval.py
index 87637875..b5347c0b 100644
--- a/src/llama_stack_client/resources/eval/eval.py
+++ b/src/llama_stack_client/resources/alpha/eval/eval.py
@@ -14,26 +14,26 @@
     JobsResourceWithStreamingResponse,
     AsyncJobsResourceWithStreamingResponse,
 )
-from ...types import (
-    eval_run_eval_params,
-    eval_evaluate_rows_params,
-    eval_run_eval_alpha_params,
-    eval_evaluate_rows_alpha_params,
-)
-from ..._types import Body, Query, Headers, NotGiven, SequenceNotStr, not_given
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Query, Headers, NotGiven, SequenceNotStr, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ...types.job import Job
-from ..._base_client import make_request_options
-from ...types.evaluate_response import EvaluateResponse
-from ...types.benchmark_config_param import BenchmarkConfigParam
+from ....types.alpha import (
+    eval_run_eval_params,
+    eval_evaluate_rows_params,
+    eval_run_eval_alpha_params,
+    eval_evaluate_rows_alpha_params,
+)
+from ...._base_client import make_request_options
+from ....types.alpha.job import Job
+from ....types.alpha.evaluate_response import EvaluateResponse
+from ....types.alpha.benchmark_config_param import BenchmarkConfigParam
 
 __all__ = ["EvalResource", "AsyncEvalResource"]
 
@@ -97,7 +97,7 @@ def evaluate_rows(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
             body=maybe_transform(
                 {
                     "benchmark_config": benchmark_config,
@@ -147,7 +147,7 @@ def evaluate_rows_alpha(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
             body=maybe_transform(
                 {
                     "benchmark_config": benchmark_config,
@@ -191,7 +191,7 @@ def run_eval(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
             body=maybe_transform({"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -228,7 +228,7 @@ def run_eval_alpha(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
             body=maybe_transform(
                 {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams
             ),
@@ -298,7 +298,7 @@ async def evaluate_rows(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return await self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
             body=await async_maybe_transform(
                 {
                     "benchmark_config": benchmark_config,
@@ -348,7 +348,7 @@ async def evaluate_rows_alpha(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return await self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
             body=await async_maybe_transform(
                 {
                     "benchmark_config": benchmark_config,
@@ -392,7 +392,7 @@ async def run_eval(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return await self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
             body=await async_maybe_transform(
                 {"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams
             ),
@@ -431,7 +431,7 @@ async def run_eval_alpha(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return await self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
             body=await async_maybe_transform(
                 {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams
             ),
diff --git a/src/llama_stack_client/resources/eval/jobs.py b/src/llama_stack_client/resources/alpha/eval/jobs.py
similarity index 93%
rename from src/llama_stack_client/resources/eval/jobs.py
rename to src/llama_stack_client/resources/alpha/eval/jobs.py
index 21f6aea6..8f0fa026 100644
--- a/src/llama_stack_client/resources/eval/jobs.py
+++ b/src/llama_stack_client/resources/alpha/eval/jobs.py
@@ -4,18 +4,18 @@
 
 import httpx
 
-from ..._types import Body, Query, Headers, NoneType, NotGiven, not_given
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Query, Headers, NoneType, NotGiven, not_given
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ...types.job import Job
-from ..._base_client import make_request_options
-from ...types.evaluate_response import EvaluateResponse
+from ...._base_client import make_request_options
+from ....types.alpha.job import Job
+from ....types.alpha.evaluate_response import EvaluateResponse
 
 __all__ = ["JobsResource", "AsyncJobsResource"]
 
@@ -69,7 +69,7 @@ def retrieve(
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         return self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -106,7 +106,7 @@ def cancel(
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return self._delete(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -142,7 +142,7 @@ def status(
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         return self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -199,7 +199,7 @@ async def retrieve(
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         return await self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -236,7 +236,7 @@ async def cancel(
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return await self._delete(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -272,7 +272,7 @@ async def status(
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         return await self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/alpha/inference.py
similarity index 94%
rename from src/llama_stack_client/resources/inference.py
rename to src/llama_stack_client/resources/alpha/inference.py
index e5cf7b6b..ca259357 100644
--- a/src/llama_stack_client/resources/inference.py
+++ b/src/llama_stack_client/resources/alpha/inference.py
@@ -6,20 +6,20 @@
 
 import httpx
 
-from ..types import inference_rerank_params
-from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
+from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from .._wrappers import DataWrapper
-from .._base_client import make_request_options
-from ..types.inference_rerank_response import InferenceRerankResponse
+from ..._wrappers import DataWrapper
+from ...types.alpha import inference_rerank_params
+from ..._base_client import make_request_options
+from ...types.alpha.inference_rerank_response import InferenceRerankResponse
 
 __all__ = ["InferenceResource", "AsyncInferenceResource"]
 
diff --git a/src/llama_stack_client/resources/post_training/__init__.py b/src/llama_stack_client/resources/alpha/post_training/__init__.py
similarity index 100%
rename from src/llama_stack_client/resources/post_training/__init__.py
rename to src/llama_stack_client/resources/alpha/post_training/__init__.py
diff --git a/src/llama_stack_client/resources/post_training/job.py b/src/llama_stack_client/resources/alpha/post_training/job.py
similarity index 92%
rename from src/llama_stack_client/resources/post_training/job.py
rename to src/llama_stack_client/resources/alpha/post_training/job.py
index ab00e054..083697a1 100644
--- a/src/llama_stack_client/resources/post_training/job.py
+++ b/src/llama_stack_client/resources/alpha/post_training/job.py
@@ -6,22 +6,22 @@
 
 import httpx
 
-from ..._types import Body, Query, Headers, NoneType, NotGiven, not_given
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Query, Headers, NoneType, NotGiven, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._wrappers import DataWrapper
-from ..._base_client import make_request_options
-from ...types.post_training import job_cancel_params, job_status_params, job_artifacts_params
-from ...types.list_post_training_jobs_response import Data
-from ...types.post_training.job_status_response import JobStatusResponse
-from ...types.post_training.job_artifacts_response import JobArtifactsResponse
+from ...._wrappers import DataWrapper
+from ...._base_client import make_request_options
+from ....types.alpha.post_training import job_cancel_params, job_status_params, job_artifacts_params
+from ....types.alpha.list_post_training_jobs_response import Data
+from ....types.alpha.post_training.job_status_response import JobStatusResponse
+from ....types.alpha.post_training.job_artifacts_response import JobArtifactsResponse
 
 __all__ = ["JobResource", "AsyncJobResource"]
 
@@ -58,7 +58,7 @@ def list(
     ) -> List[Data]:
         """Get all training jobs."""
         return self._get(
-            "/v1/post-training/jobs",
+            "/v1alpha/post-training/jobs",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -95,7 +95,7 @@ def artifacts(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._get(
-            "/v1/post-training/job/artifacts",
+            "/v1alpha/post-training/job/artifacts",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -133,7 +133,7 @@ def cancel(
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return self._post(
-            "/v1/post-training/job/cancel",
+            "/v1alpha/post-training/job/cancel",
             body=maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -167,7 +167,7 @@ def status(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._get(
-            "/v1/post-training/job/status",
+            "/v1alpha/post-training/job/status",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -211,7 +211,7 @@ async def list(
     ) -> List[Data]:
         """Get all training jobs."""
         return await self._get(
-            "/v1/post-training/jobs",
+            "/v1alpha/post-training/jobs",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -248,7 +248,7 @@ async def artifacts(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._get(
-            "/v1/post-training/job/artifacts",
+            "/v1alpha/post-training/job/artifacts",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -286,7 +286,7 @@ async def cancel(
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return await self._post(
-            "/v1/post-training/job/cancel",
+            "/v1alpha/post-training/job/cancel",
             body=await async_maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -320,7 +320,7 @@ async def status(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._get(
-            "/v1/post-training/job/status",
+            "/v1alpha/post-training/job/status",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
diff --git a/src/llama_stack_client/resources/post_training/post_training.py b/src/llama_stack_client/resources/alpha/post_training/post_training.py
similarity index 95%
rename from src/llama_stack_client/resources/post_training/post_training.py
rename to src/llama_stack_client/resources/alpha/post_training/post_training.py
index 760d9cb2..a26c813a 100644
--- a/src/llama_stack_client/resources/post_training/post_training.py
+++ b/src/llama_stack_client/resources/alpha/post_training/post_training.py
@@ -14,23 +14,23 @@
     JobResourceWithStreamingResponse,
     AsyncJobResourceWithStreamingResponse,
 )
-from ...types import (
-    post_training_preference_optimize_params,
-    post_training_supervised_fine_tune_params,
-)
-from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._base_client import make_request_options
-from ...types.post_training_job import PostTrainingJob
-from ...types.algorithm_config_param import AlgorithmConfigParam
+from ....types.alpha import (
+    post_training_preference_optimize_params,
+    post_training_supervised_fine_tune_params,
+)
+from ...._base_client import make_request_options
+from ....types.alpha.post_training_job import PostTrainingJob
+from ....types.alpha.algorithm_config_param import AlgorithmConfigParam
 
 __all__ = ["PostTrainingResource", "AsyncPostTrainingResource"]
 
@@ -100,7 +100,7 @@ def preference_optimize(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._post(
-            "/v1/post-training/preference-optimize",
+            "/v1alpha/post-training/preference-optimize",
             body=maybe_transform(
                 {
                     "algorithm_config": algorithm_config,
@@ -162,7 +162,7 @@ def supervised_fine_tune(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._post(
-            "/v1/post-training/supervised-fine-tune",
+            "/v1alpha/post-training/supervised-fine-tune",
             body=maybe_transform(
                 {
                     "hyperparam_search_config": hyperparam_search_config,
@@ -247,7 +247,7 @@ async def preference_optimize(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._post(
-            "/v1/post-training/preference-optimize",
+            "/v1alpha/post-training/preference-optimize",
             body=await async_maybe_transform(
                 {
                     "algorithm_config": algorithm_config,
@@ -309,7 +309,7 @@ async def supervised_fine_tune(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._post(
-            "/v1/post-training/supervised-fine-tune",
+            "/v1alpha/post-training/supervised-fine-tune",
             body=await async_maybe_transform(
                 {
                     "hyperparam_search_config": hyperparam_search_config,
diff --git a/src/llama_stack_client/resources/vector_stores/__init__.py b/src/llama_stack_client/resources/vector_stores/__init__.py
index 85d202da..d83a42b6 100644
--- a/src/llama_stack_client/resources/vector_stores/__init__.py
+++ b/src/llama_stack_client/resources/vector_stores/__init__.py
@@ -8,6 +8,14 @@
     FilesResourceWithStreamingResponse,
     AsyncFilesResourceWithStreamingResponse,
 )
+from .file_batches import (
+    FileBatchesResource,
+    AsyncFileBatchesResource,
+    FileBatchesResourceWithRawResponse,
+    AsyncFileBatchesResourceWithRawResponse,
+    FileBatchesResourceWithStreamingResponse,
+    AsyncFileBatchesResourceWithStreamingResponse,
+)
 from .vector_stores import (
     VectorStoresResource,
     AsyncVectorStoresResource,
@@ -24,6 +32,12 @@
     "AsyncFilesResourceWithRawResponse",
     "FilesResourceWithStreamingResponse",
     "AsyncFilesResourceWithStreamingResponse",
+    "FileBatchesResource",
+    "AsyncFileBatchesResource",
+    "FileBatchesResourceWithRawResponse",
+    "AsyncFileBatchesResourceWithRawResponse",
+    "FileBatchesResourceWithStreamingResponse",
+    "AsyncFileBatchesResourceWithStreamingResponse",
     "VectorStoresResource",
     "AsyncVectorStoresResource",
     "VectorStoresResourceWithRawResponse",
diff --git a/src/llama_stack_client/resources/vector_stores/file_batches.py b/src/llama_stack_client/resources/vector_stores/file_batches.py
new file mode 100644
index 00000000..654fdd79
--- /dev/null
+++ b/src/llama_stack_client/resources/vector_stores/file_batches.py
@@ -0,0 +1,521 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+
+import httpx
+
+from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...pagination import SyncOpenAICursorPage, AsyncOpenAICursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.vector_stores import file_batch_list_params, file_batch_create_params
+from ...types.vector_stores.vector_store_file import VectorStoreFile
+from ...types.vector_stores.vector_store_file_batches import VectorStoreFileBatches
+
+__all__ = ["FileBatchesResource", "AsyncFileBatchesResource"]
+
+
+class FileBatchesResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FileBatchesResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return FileBatchesResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FileBatchesResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return FileBatchesResourceWithStreamingResponse(self)
+
+    def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: SequenceNotStr[str],
+        attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | Omit = omit,
+        chunking_strategy: file_batch_create_params.ChunkingStrategy | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFileBatches:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of File IDs that the vector store should use.
+
+          attributes: (Optional) Key-value attributes to store with the files.
+
+          chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        return self._post(
+            f"/v1/vector_stores/{vector_store_id}/file_batches",
+            body=maybe_transform(
+                {
+                    "file_ids": file_ids,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatches,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFileBatches:
+        """
+        Retrieve a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._get(
+            f"/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatches,
+        )
+
+    def list(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        filter: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncOpenAICursorPage[VectorStoreFile]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list.
+
+          filter: Filter by file status. One of in_progress, completed, failed, cancelled.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._get_api_list(
+            f"/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=SyncOpenAICursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_params.FileBatchListParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFileBatches:
+        """
+        Cancels a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._post(
+            f"/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatches,
+        )
+
+
+class AsyncFileBatchesResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFileBatchesResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFileBatchesResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFileBatchesResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncFileBatchesResourceWithStreamingResponse(self)
+
+    async def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: SequenceNotStr[str],
+        attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | Omit = omit,
+        chunking_strategy: file_batch_create_params.ChunkingStrategy | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFileBatches:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of File IDs that the vector store should use.
+
+          attributes: (Optional) Key-value attributes to store with the files.
+
+          chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        return await self._post(
+            f"/v1/vector_stores/{vector_store_id}/file_batches",
+            body=await async_maybe_transform(
+                {
+                    "file_ids": file_ids,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatches,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFileBatches:
+        """
+        Retrieve a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._get(
+            f"/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatches,
+        )
+
+    def list(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        filter: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[VectorStoreFile, AsyncOpenAICursorPage[VectorStoreFile]]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list.
+
+          filter: Filter by file status. One of in_progress, completed, failed, cancelled.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._get_api_list(
+            f"/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=AsyncOpenAICursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_params.FileBatchListParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFileBatches:
+        """
+        Cancels a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._post(
+            f"/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatches,
+        )
+
+
+class FileBatchesResourceWithRawResponse:
+    def __init__(self, file_batches: FileBatchesResource) -> None:
+        self._file_batches = file_batches
+
+        self.create = to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.list = to_raw_response_wrapper(
+            file_batches.list,
+        )
+        self.cancel = to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+
+
+class AsyncFileBatchesResourceWithRawResponse:
+    def __init__(self, file_batches: AsyncFileBatchesResource) -> None:
+        self._file_batches = file_batches
+
+        self.create = async_to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = async_to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.list = async_to_raw_response_wrapper(
+            file_batches.list,
+        )
+        self.cancel = async_to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+
+
+class FileBatchesResourceWithStreamingResponse:
+    def __init__(self, file_batches: FileBatchesResource) -> None:
+        self._file_batches = file_batches
+
+        self.create = to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            file_batches.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
+
+
+class AsyncFileBatchesResourceWithStreamingResponse:
+    def __init__(self, file_batches: AsyncFileBatchesResource) -> None:
+        self._file_batches = file_batches
+
+        self.create = async_to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            file_batches.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
diff --git a/src/llama_stack_client/resources/vector_stores/vector_stores.py b/src/llama_stack_client/resources/vector_stores/vector_stores.py
index f858100b..86bb0e06 100644
--- a/src/llama_stack_client/resources/vector_stores/vector_stores.py
+++ b/src/llama_stack_client/resources/vector_stores/vector_stores.py
@@ -31,6 +31,14 @@
     async_to_streamed_response_wrapper,
 )
 from ...pagination import SyncOpenAICursorPage, AsyncOpenAICursorPage
+from .file_batches import (
+    FileBatchesResource,
+    AsyncFileBatchesResource,
+    FileBatchesResourceWithRawResponse,
+    AsyncFileBatchesResourceWithRawResponse,
+    FileBatchesResourceWithStreamingResponse,
+    AsyncFileBatchesResourceWithStreamingResponse,
+)
 from ..._base_client import AsyncPaginator, make_request_options
 from ...types.vector_store import VectorStore
 from ...types.vector_store_delete_response import VectorStoreDeleteResponse
@@ -44,6 +52,10 @@ class VectorStoresResource(SyncAPIResource):
     def files(self) -> FilesResource:
         return FilesResource(self._client)
 
+    @cached_property
+    def file_batches(self) -> FileBatchesResource:
+        return FileBatchesResource(self._client)
+
     @cached_property
     def with_raw_response(self) -> VectorStoresResourceWithRawResponse:
         """
@@ -378,6 +390,10 @@ class AsyncVectorStoresResource(AsyncAPIResource):
     def files(self) -> AsyncFilesResource:
         return AsyncFilesResource(self._client)
 
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesResource:
+        return AsyncFileBatchesResource(self._client)
+
     @cached_property
     def with_raw_response(self) -> AsyncVectorStoresResourceWithRawResponse:
         """
@@ -734,6 +750,10 @@ def __init__(self, vector_stores: VectorStoresResource) -> None:
     def files(self) -> FilesResourceWithRawResponse:
         return FilesResourceWithRawResponse(self._vector_stores.files)
 
+    @cached_property
+    def file_batches(self) -> FileBatchesResourceWithRawResponse:
+        return FileBatchesResourceWithRawResponse(self._vector_stores.file_batches)
+
 
 class AsyncVectorStoresResourceWithRawResponse:
     def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
@@ -762,6 +782,10 @@ def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
     def files(self) -> AsyncFilesResourceWithRawResponse:
         return AsyncFilesResourceWithRawResponse(self._vector_stores.files)
 
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesResourceWithRawResponse:
+        return AsyncFileBatchesResourceWithRawResponse(self._vector_stores.file_batches)
+
 
 class VectorStoresResourceWithStreamingResponse:
     def __init__(self, vector_stores: VectorStoresResource) -> None:
@@ -790,6 +814,10 @@ def __init__(self, vector_stores: VectorStoresResource) -> None:
     def files(self) -> FilesResourceWithStreamingResponse:
         return FilesResourceWithStreamingResponse(self._vector_stores.files)
 
+    @cached_property
+    def file_batches(self) -> FileBatchesResourceWithStreamingResponse:
+        return FileBatchesResourceWithStreamingResponse(self._vector_stores.file_batches)
+
 
 class AsyncVectorStoresResourceWithStreamingResponse:
     def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
@@ -817,3 +845,7 @@ def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
     @cached_property
     def files(self) -> AsyncFilesResourceWithStreamingResponse:
         return AsyncFilesResourceWithStreamingResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesResourceWithStreamingResponse:
+        return AsyncFileBatchesResourceWithStreamingResponse(self._vector_stores.file_batches)
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index f81ada61..0c3d0f34 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -2,13 +2,11 @@
 
 from __future__ import annotations
 
-from .job import Job as Job
 from .file import File as File
 from .tool import Tool as Tool
 from .model import Model as Model
 from .trace import Trace as Trace
 from .shared import (
-    Metric as Metric,
     Message as Message,
     Document as Document,
     ToolCall as ToolCall,
@@ -24,9 +22,7 @@
     SafetyViolation as SafetyViolation,
     CompletionMessage as CompletionMessage,
     InterleavedContent as InterleavedContent,
-    ToolParamDefinition as ToolParamDefinition,
     ToolResponseMessage as ToolResponseMessage,
-    ChatCompletionResponse as ChatCompletionResponse,
     InterleavedContentItem as InterleavedContentItem,
 )
 from .shield import Shield as Shield
@@ -40,48 +36,33 @@
 from .vector_store import VectorStore as VectorStore
 from .version_info import VersionInfo as VersionInfo
 from .provider_info import ProviderInfo as ProviderInfo
-from .tool_response import ToolResponse as ToolResponse
-from .inference_step import InferenceStep as InferenceStep
 from .tool_def_param import ToolDefParam as ToolDefParam
 from .create_response import CreateResponse as CreateResponse
 from .response_object import ResponseObject as ResponseObject
 from .file_list_params import FileListParams as FileListParams
-from .shield_call_step import ShieldCallStep as ShieldCallStep
 from .span_with_status import SpanWithStatus as SpanWithStatus
 from .tool_list_params import ToolListParams as ToolListParams
-from .agent_list_params import AgentListParams as AgentListParams
-from .evaluate_response import EvaluateResponse as EvaluateResponse
-from .post_training_job import PostTrainingJob as PostTrainingJob
 from .scoring_fn_params import ScoringFnParams as ScoringFnParams
 from .file_create_params import FileCreateParams as FileCreateParams
 from .tool_list_response import ToolListResponse as ToolListResponse
-from .agent_create_params import AgentCreateParams as AgentCreateParams
-from .agent_list_response import AgentListResponse as AgentListResponse
 from .list_files_response import ListFilesResponse as ListFilesResponse
 from .list_tools_response import ListToolsResponse as ListToolsResponse
 from .model_list_response import ModelListResponse as ModelListResponse
 from .route_list_response import RouteListResponse as RouteListResponse
 from .run_shield_response import RunShieldResponse as RunShieldResponse
-from .tool_execution_step import ToolExecutionStep as ToolExecutionStep
-from .tool_response_param import ToolResponseParam as ToolResponseParam
 from .delete_file_response import DeleteFileResponse as DeleteFileResponse
-from .eval_run_eval_params import EvalRunEvalParams as EvalRunEvalParams
 from .list_models_response import ListModelsResponse as ListModelsResponse
 from .list_routes_response import ListRoutesResponse as ListRoutesResponse
 from .query_spans_response import QuerySpansResponse as QuerySpansResponse
 from .response_list_params import ResponseListParams as ResponseListParams
 from .scoring_score_params import ScoringScoreParams as ScoringScoreParams
 from .shield_list_response import ShieldListResponse as ShieldListResponse
-from .agent_create_response import AgentCreateResponse as AgentCreateResponse
 from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
 from .dataset_list_response import DatasetListResponse as DatasetListResponse
 from .list_shields_response import ListShieldsResponse as ListShieldsResponse
-from .memory_retrieval_step import MemoryRetrievalStep as MemoryRetrievalStep
 from .model_register_params import ModelRegisterParams as ModelRegisterParams
 from .query_chunks_response import QueryChunksResponse as QueryChunksResponse
 from .query_condition_param import QueryConditionParam as QueryConditionParam
-from .algorithm_config_param import AlgorithmConfigParam as AlgorithmConfigParam
-from .benchmark_config_param import BenchmarkConfigParam as BenchmarkConfigParam
 from .list_datasets_response import ListDatasetsResponse as ListDatasetsResponse
 from .provider_list_response import ProviderListResponse as ProviderListResponse
 from .response_create_params import ResponseCreateParams as ResponseCreateParams
@@ -91,12 +72,10 @@
 from .shield_register_params import ShieldRegisterParams as ShieldRegisterParams
 from .tool_invocation_result import ToolInvocationResult as ToolInvocationResult
 from .vector_io_query_params import VectorIoQueryParams as VectorIoQueryParams
-from .agent_retrieve_response import AgentRetrieveResponse as AgentRetrieveResponse
 from .benchmark_list_response import BenchmarkListResponse as BenchmarkListResponse
 from .dataset_iterrows_params import DatasetIterrowsParams as DatasetIterrowsParams
 from .dataset_register_params import DatasetRegisterParams as DatasetRegisterParams
 from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
-from .inference_rerank_params import InferenceRerankParams as InferenceRerankParams
 from .list_providers_response import ListProvidersResponse as ListProvidersResponse
 from .scoring_fn_params_param import ScoringFnParamsParam as ScoringFnParamsParam
 from .toolgroup_list_response import ToolgroupListResponse as ToolgroupListResponse
@@ -114,14 +93,11 @@
 from .dataset_iterrows_response import DatasetIterrowsResponse as DatasetIterrowsResponse
 from .dataset_register_response import DatasetRegisterResponse as DatasetRegisterResponse
 from .dataset_retrieve_response import DatasetRetrieveResponse as DatasetRetrieveResponse
-from .eval_evaluate_rows_params import EvalEvaluateRowsParams as EvalEvaluateRowsParams
-from .inference_rerank_response import InferenceRerankResponse as InferenceRerankResponse
 from .list_tool_groups_response import ListToolGroupsResponse as ListToolGroupsResponse
 from .toolgroup_register_params import ToolgroupRegisterParams as ToolgroupRegisterParams
 from .vector_db_register_params import VectorDBRegisterParams as VectorDBRegisterParams
 from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
 from .create_embeddings_response import CreateEmbeddingsResponse as CreateEmbeddingsResponse
-from .eval_run_eval_alpha_params import EvalRunEvalAlphaParams as EvalRunEvalAlphaParams
 from .scoring_score_batch_params import ScoringScoreBatchParams as ScoringScoreBatchParams
 from .telemetry_log_event_params import TelemetryLogEventParams as TelemetryLogEventParams
 from .vector_store_create_params import VectorStoreCreateParams as VectorStoreCreateParams
@@ -141,11 +117,9 @@
 from .telemetry_query_metrics_params import TelemetryQueryMetricsParams as TelemetryQueryMetricsParams
 from .telemetry_query_spans_response import TelemetryQuerySpansResponse as TelemetryQuerySpansResponse
 from .tool_runtime_list_tools_params import ToolRuntimeListToolsParams as ToolRuntimeListToolsParams
-from .eval_evaluate_rows_alpha_params import EvalEvaluateRowsAlphaParams as EvalEvaluateRowsAlphaParams
 from .list_scoring_functions_response import ListScoringFunctionsResponse as ListScoringFunctionsResponse
 from .telemetry_query_traces_response import TelemetryQueryTracesResponse as TelemetryQueryTracesResponse
 from .tool_runtime_invoke_tool_params import ToolRuntimeInvokeToolParams as ToolRuntimeInvokeToolParams
-from .list_post_training_jobs_response import ListPostTrainingJobsResponse as ListPostTrainingJobsResponse
 from .scoring_function_register_params import ScoringFunctionRegisterParams as ScoringFunctionRegisterParams
 from .telemetry_get_span_tree_response import TelemetryGetSpanTreeResponse as TelemetryGetSpanTreeResponse
 from .telemetry_query_metrics_response import TelemetryQueryMetricsResponse as TelemetryQueryMetricsResponse
@@ -154,12 +128,6 @@
 from .telemetry_save_spans_to_dataset_params import (
     TelemetrySaveSpansToDatasetParams as TelemetrySaveSpansToDatasetParams,
 )
-from .post_training_preference_optimize_params import (
-    PostTrainingPreferenceOptimizeParams as PostTrainingPreferenceOptimizeParams,
-)
-from .post_training_supervised_fine_tune_params import (
-    PostTrainingSupervisedFineTuneParams as PostTrainingSupervisedFineTuneParams,
-)
 from .synthetic_data_generation_generate_params import (
     SyntheticDataGenerationGenerateParams as SyntheticDataGenerationGenerateParams,
 )
diff --git a/src/llama_stack_client/types/alpha/__init__.py b/src/llama_stack_client/types/alpha/__init__.py
new file mode 100644
index 00000000..9651e73a
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .job import Job as Job
+from .tool_response import ToolResponse as ToolResponse
+from .inference_step import InferenceStep as InferenceStep
+from .shield_call_step import ShieldCallStep as ShieldCallStep
+from .agent_list_params import AgentListParams as AgentListParams
+from .evaluate_response import EvaluateResponse as EvaluateResponse
+from .post_training_job import PostTrainingJob as PostTrainingJob
+from .agent_create_params import AgentCreateParams as AgentCreateParams
+from .agent_list_response import AgentListResponse as AgentListResponse
+from .tool_execution_step import ToolExecutionStep as ToolExecutionStep
+from .tool_response_param import ToolResponseParam as ToolResponseParam
+from .eval_run_eval_params import EvalRunEvalParams as EvalRunEvalParams
+from .agent_create_response import AgentCreateResponse as AgentCreateResponse
+from .memory_retrieval_step import MemoryRetrievalStep as MemoryRetrievalStep
+from .algorithm_config_param import AlgorithmConfigParam as AlgorithmConfigParam
+from .benchmark_config_param import BenchmarkConfigParam as BenchmarkConfigParam
+from .agent_retrieve_response import AgentRetrieveResponse as AgentRetrieveResponse
+from .inference_rerank_params import InferenceRerankParams as InferenceRerankParams
+from .eval_evaluate_rows_params import EvalEvaluateRowsParams as EvalEvaluateRowsParams
+from .inference_rerank_response import InferenceRerankResponse as InferenceRerankResponse
+from .eval_run_eval_alpha_params import EvalRunEvalAlphaParams as EvalRunEvalAlphaParams
+from .eval_evaluate_rows_alpha_params import EvalEvaluateRowsAlphaParams as EvalEvaluateRowsAlphaParams
+from .list_post_training_jobs_response import ListPostTrainingJobsResponse as ListPostTrainingJobsResponse
+from .post_training_preference_optimize_params import (
+    PostTrainingPreferenceOptimizeParams as PostTrainingPreferenceOptimizeParams,
+)
+from .post_training_supervised_fine_tune_params import (
+    PostTrainingSupervisedFineTuneParams as PostTrainingSupervisedFineTuneParams,
+)
diff --git a/src/llama_stack_client/types/agent_create_params.py b/src/llama_stack_client/types/alpha/agent_create_params.py
similarity index 86%
rename from src/llama_stack_client/types/agent_create_params.py
rename to src/llama_stack_client/types/alpha/agent_create_params.py
index 525cf1e2..368704b2 100644
--- a/src/llama_stack_client/types/agent_create_params.py
+++ b/src/llama_stack_client/types/alpha/agent_create_params.py
@@ -4,7 +4,7 @@
 
 from typing_extensions import Required, TypedDict
 
-from .shared_params.agent_config import AgentConfig
+from ..shared_params.agent_config import AgentConfig
 
 __all__ = ["AgentCreateParams"]
 
diff --git a/src/llama_stack_client/types/agent_create_response.py b/src/llama_stack_client/types/alpha/agent_create_response.py
similarity index 87%
rename from src/llama_stack_client/types/agent_create_response.py
rename to src/llama_stack_client/types/alpha/agent_create_response.py
index 24fe864e..9b155198 100644
--- a/src/llama_stack_client/types/agent_create_response.py
+++ b/src/llama_stack_client/types/alpha/agent_create_response.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["AgentCreateResponse"]
 
diff --git a/src/llama_stack_client/types/agent_list_params.py b/src/llama_stack_client/types/alpha/agent_list_params.py
similarity index 100%
rename from src/llama_stack_client/types/agent_list_params.py
rename to src/llama_stack_client/types/alpha/agent_list_params.py
diff --git a/src/llama_stack_client/types/agent_list_response.py b/src/llama_stack_client/types/alpha/agent_list_response.py
similarity index 93%
rename from src/llama_stack_client/types/agent_list_response.py
rename to src/llama_stack_client/types/alpha/agent_list_response.py
index d0640e21..69de5001 100644
--- a/src/llama_stack_client/types/agent_list_response.py
+++ b/src/llama_stack_client/types/alpha/agent_list_response.py
@@ -2,7 +2,7 @@
 
 from typing import Dict, List, Union, Optional
 
-from .._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["AgentListResponse"]
 
diff --git a/src/llama_stack_client/types/agent_retrieve_response.py b/src/llama_stack_client/types/alpha/agent_retrieve_response.py
similarity index 83%
rename from src/llama_stack_client/types/agent_retrieve_response.py
rename to src/llama_stack_client/types/alpha/agent_retrieve_response.py
index 1671a9fc..87d79b7b 100644
--- a/src/llama_stack_client/types/agent_retrieve_response.py
+++ b/src/llama_stack_client/types/alpha/agent_retrieve_response.py
@@ -2,8 +2,8 @@
 
 from datetime import datetime
 
-from .._models import BaseModel
-from .shared.agent_config import AgentConfig
+from ..._models import BaseModel
+from ..shared.agent_config import AgentConfig
 
 __all__ = ["AgentRetrieveResponse"]
 
diff --git a/src/llama_stack_client/types/agents/__init__.py b/src/llama_stack_client/types/alpha/agents/__init__.py
similarity index 100%
rename from src/llama_stack_client/types/agents/__init__.py
rename to src/llama_stack_client/types/alpha/agents/__init__.py
diff --git a/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py b/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
similarity index 90%
rename from src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py
rename to src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
index 1ce1b8a7..c45bf756 100644
--- a/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py
+++ b/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from ..._models import BaseModel
+from ...._models import BaseModel
 from .turn_response_event import TurnResponseEvent
 
 __all__ = ["AgentTurnResponseStreamChunk"]
diff --git a/src/llama_stack_client/types/agents/session.py b/src/llama_stack_client/types/alpha/agents/session.py
similarity index 93%
rename from src/llama_stack_client/types/agents/session.py
rename to src/llama_stack_client/types/alpha/agents/session.py
index 1d3d697e..9b60853a 100644
--- a/src/llama_stack_client/types/agents/session.py
+++ b/src/llama_stack_client/types/alpha/agents/session.py
@@ -4,7 +4,7 @@
 from datetime import datetime
 
 from .turn import Turn
-from ..._models import BaseModel
+from ...._models import BaseModel
 
 __all__ = ["Session"]
 
diff --git a/src/llama_stack_client/types/agents/session_create_params.py b/src/llama_stack_client/types/alpha/agents/session_create_params.py
similarity index 100%
rename from src/llama_stack_client/types/agents/session_create_params.py
rename to src/llama_stack_client/types/alpha/agents/session_create_params.py
diff --git a/src/llama_stack_client/types/agents/session_create_response.py b/src/llama_stack_client/types/alpha/agents/session_create_response.py
similarity index 87%
rename from src/llama_stack_client/types/agents/session_create_response.py
rename to src/llama_stack_client/types/alpha/agents/session_create_response.py
index e7fe2a06..7d30c61a 100644
--- a/src/llama_stack_client/types/agents/session_create_response.py
+++ b/src/llama_stack_client/types/alpha/agents/session_create_response.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from ..._models import BaseModel
+from ...._models import BaseModel
 
 __all__ = ["SessionCreateResponse"]
 
diff --git a/src/llama_stack_client/types/agents/session_list_params.py b/src/llama_stack_client/types/alpha/agents/session_list_params.py
similarity index 100%
rename from src/llama_stack_client/types/agents/session_list_params.py
rename to src/llama_stack_client/types/alpha/agents/session_list_params.py
diff --git a/src/llama_stack_client/types/agents/session_list_response.py b/src/llama_stack_client/types/alpha/agents/session_list_response.py
similarity index 93%
rename from src/llama_stack_client/types/agents/session_list_response.py
rename to src/llama_stack_client/types/alpha/agents/session_list_response.py
index e70ecc46..23a51baf 100644
--- a/src/llama_stack_client/types/agents/session_list_response.py
+++ b/src/llama_stack_client/types/alpha/agents/session_list_response.py
@@ -2,7 +2,7 @@
 
 from typing import Dict, List, Union, Optional
 
-from ..._models import BaseModel
+from ...._models import BaseModel
 
 __all__ = ["SessionListResponse"]
 
diff --git a/src/llama_stack_client/types/agents/session_retrieve_params.py b/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
similarity index 91%
rename from src/llama_stack_client/types/agents/session_retrieve_params.py
rename to src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
index aeff0ff7..116190cc 100644
--- a/src/llama_stack_client/types/agents/session_retrieve_params.py
+++ b/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
@@ -4,7 +4,7 @@
 
 from typing_extensions import Required, TypedDict
 
-from ..._types import SequenceNotStr
+from ...._types import SequenceNotStr
 
 __all__ = ["SessionRetrieveParams"]
 
diff --git a/src/llama_stack_client/types/agents/step_retrieve_response.py b/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
similarity index 90%
rename from src/llama_stack_client/types/agents/step_retrieve_response.py
rename to src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
index 10fc13d2..55b64355 100644
--- a/src/llama_stack_client/types/agents/step_retrieve_response.py
+++ b/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
@@ -3,8 +3,8 @@
 from typing import Union
 from typing_extensions import Annotated, TypeAlias
 
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
 from ..inference_step import InferenceStep
 from ..shield_call_step import ShieldCallStep
 from ..tool_execution_step import ToolExecutionStep
diff --git a/src/llama_stack_client/types/agents/turn.py b/src/llama_stack_client/types/alpha/agents/turn.py
similarity index 91%
rename from src/llama_stack_client/types/agents/turn.py
rename to src/llama_stack_client/types/alpha/agents/turn.py
index 386d7f78..74ef22aa 100644
--- a/src/llama_stack_client/types/agents/turn.py
+++ b/src/llama_stack_client/types/alpha/agents/turn.py
@@ -4,16 +4,16 @@
 from datetime import datetime
 from typing_extensions import Literal, Annotated, TypeAlias
 
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
 from ..inference_step import InferenceStep
 from ..shield_call_step import ShieldCallStep
-from ..shared.user_message import UserMessage
 from ..tool_execution_step import ToolExecutionStep
+from ...shared.user_message import UserMessage
 from ..memory_retrieval_step import MemoryRetrievalStep
-from ..shared.completion_message import CompletionMessage
-from ..shared.tool_response_message import ToolResponseMessage
-from ..shared.interleaved_content_item import InterleavedContentItem
+from ...shared.completion_message import CompletionMessage
+from ...shared.tool_response_message import ToolResponseMessage
+from ...shared.interleaved_content_item import InterleavedContentItem
 
 __all__ = [
     "Turn",
diff --git a/src/llama_stack_client/types/agents/turn_create_params.py b/src/llama_stack_client/types/alpha/agents/turn_create_params.py
similarity index 95%
rename from src/llama_stack_client/types/agents/turn_create_params.py
rename to src/llama_stack_client/types/alpha/agents/turn_create_params.py
index 8c8e4999..7225959a 100644
--- a/src/llama_stack_client/types/agents/turn_create_params.py
+++ b/src/llama_stack_client/types/alpha/agents/turn_create_params.py
@@ -5,10 +5,10 @@
 from typing import Dict, Union, Iterable
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from ..._types import SequenceNotStr
-from ..shared_params.user_message import UserMessage
-from ..shared_params.tool_response_message import ToolResponseMessage
-from ..shared_params.interleaved_content_item import InterleavedContentItem
+from ...._types import SequenceNotStr
+from ...shared_params.user_message import UserMessage
+from ...shared_params.tool_response_message import ToolResponseMessage
+from ...shared_params.interleaved_content_item import InterleavedContentItem
 
 __all__ = [
     "TurnCreateParamsBase",
diff --git a/src/llama_stack_client/types/agents/turn_response_event.py b/src/llama_stack_client/types/alpha/agents/turn_response_event.py
similarity index 97%
rename from src/llama_stack_client/types/agents/turn_response_event.py
rename to src/llama_stack_client/types/alpha/agents/turn_response_event.py
index c52121ab..c162135d 100644
--- a/src/llama_stack_client/types/agents/turn_response_event.py
+++ b/src/llama_stack_client/types/alpha/agents/turn_response_event.py
@@ -4,11 +4,11 @@
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from .turn import Turn
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
 from ..inference_step import InferenceStep
-from ..shared.tool_call import ToolCall
 from ..shield_call_step import ShieldCallStep
+from ...shared.tool_call import ToolCall
 from ..tool_execution_step import ToolExecutionStep
 from ..memory_retrieval_step import MemoryRetrievalStep
 
diff --git a/src/llama_stack_client/types/agents/turn_resume_params.py b/src/llama_stack_client/types/alpha/agents/turn_resume_params.py
similarity index 100%
rename from src/llama_stack_client/types/agents/turn_resume_params.py
rename to src/llama_stack_client/types/alpha/agents/turn_resume_params.py
diff --git a/src/llama_stack_client/types/algorithm_config_param.py b/src/llama_stack_client/types/alpha/algorithm_config_param.py
similarity index 97%
rename from src/llama_stack_client/types/algorithm_config_param.py
rename to src/llama_stack_client/types/alpha/algorithm_config_param.py
index 6940953e..d6da8130 100644
--- a/src/llama_stack_client/types/algorithm_config_param.py
+++ b/src/llama_stack_client/types/alpha/algorithm_config_param.py
@@ -5,7 +5,7 @@
 from typing import Union
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from .._types import SequenceNotStr
+from ..._types import SequenceNotStr
 
 __all__ = ["AlgorithmConfigParam", "LoraFinetuningConfig", "QatFinetuningConfig"]
 
diff --git a/src/llama_stack_client/types/benchmark_config_param.py b/src/llama_stack_client/types/alpha/benchmark_config_param.py
similarity index 86%
rename from src/llama_stack_client/types/benchmark_config_param.py
rename to src/llama_stack_client/types/alpha/benchmark_config_param.py
index dc968521..4a3ea512 100644
--- a/src/llama_stack_client/types/benchmark_config_param.py
+++ b/src/llama_stack_client/types/alpha/benchmark_config_param.py
@@ -5,10 +5,10 @@
 from typing import Dict, Union
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from .scoring_fn_params_param import ScoringFnParamsParam
-from .shared_params.agent_config import AgentConfig
-from .shared_params.system_message import SystemMessage
-from .shared_params.sampling_params import SamplingParams
+from ..scoring_fn_params_param import ScoringFnParamsParam
+from ..shared_params.agent_config import AgentConfig
+from ..shared_params.system_message import SystemMessage
+from ..shared_params.sampling_params import SamplingParams
 
 __all__ = ["BenchmarkConfigParam", "EvalCandidate", "EvalCandidateModelCandidate", "EvalCandidateAgentCandidate"]
 
diff --git a/src/llama_stack_client/types/eval/__init__.py b/src/llama_stack_client/types/alpha/eval/__init__.py
similarity index 100%
rename from src/llama_stack_client/types/eval/__init__.py
rename to src/llama_stack_client/types/alpha/eval/__init__.py
diff --git a/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py b/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
similarity index 95%
rename from src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py
rename to src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
index e4953252..0422e224 100644
--- a/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py
+++ b/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
@@ -5,7 +5,7 @@
 from typing import Dict, Union, Iterable
 from typing_extensions import Required, TypedDict
 
-from .._types import SequenceNotStr
+from ..._types import SequenceNotStr
 from .benchmark_config_param import BenchmarkConfigParam
 
 __all__ = ["EvalEvaluateRowsAlphaParams"]
diff --git a/src/llama_stack_client/types/eval_evaluate_rows_params.py b/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
similarity index 95%
rename from src/llama_stack_client/types/eval_evaluate_rows_params.py
rename to src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
index 128f363d..4ff9bd5b 100644
--- a/src/llama_stack_client/types/eval_evaluate_rows_params.py
+++ b/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
@@ -5,7 +5,7 @@
 from typing import Dict, Union, Iterable
 from typing_extensions import Required, TypedDict
 
-from .._types import SequenceNotStr
+from ..._types import SequenceNotStr
 from .benchmark_config_param import BenchmarkConfigParam
 
 __all__ = ["EvalEvaluateRowsParams"]
diff --git a/src/llama_stack_client/types/eval_run_eval_alpha_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
similarity index 100%
rename from src/llama_stack_client/types/eval_run_eval_alpha_params.py
rename to src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
diff --git a/src/llama_stack_client/types/eval_run_eval_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_params.py
similarity index 100%
rename from src/llama_stack_client/types/eval_run_eval_params.py
rename to src/llama_stack_client/types/alpha/eval_run_eval_params.py
diff --git a/src/llama_stack_client/types/evaluate_response.py b/src/llama_stack_client/types/alpha/evaluate_response.py
similarity index 83%
rename from src/llama_stack_client/types/evaluate_response.py
rename to src/llama_stack_client/types/alpha/evaluate_response.py
index 8e463352..4cd2e0f7 100644
--- a/src/llama_stack_client/types/evaluate_response.py
+++ b/src/llama_stack_client/types/alpha/evaluate_response.py
@@ -2,8 +2,8 @@
 
 from typing import Dict, List, Union
 
-from .._models import BaseModel
-from .shared.scoring_result import ScoringResult
+from ..._models import BaseModel
+from ..shared.scoring_result import ScoringResult
 
 __all__ = ["EvaluateResponse"]
 
diff --git a/src/llama_stack_client/types/inference_rerank_params.py b/src/llama_stack_client/types/alpha/inference_rerank_params.py
similarity index 98%
rename from src/llama_stack_client/types/inference_rerank_params.py
rename to src/llama_stack_client/types/alpha/inference_rerank_params.py
index 8f8c4d64..4c506240 100644
--- a/src/llama_stack_client/types/inference_rerank_params.py
+++ b/src/llama_stack_client/types/alpha/inference_rerank_params.py
@@ -5,7 +5,7 @@
 from typing import Union
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from .._types import SequenceNotStr
+from ..._types import SequenceNotStr
 
 __all__ = [
     "InferenceRerankParams",
diff --git a/src/llama_stack_client/types/inference_rerank_response.py b/src/llama_stack_client/types/alpha/inference_rerank_response.py
similarity index 94%
rename from src/llama_stack_client/types/inference_rerank_response.py
rename to src/llama_stack_client/types/alpha/inference_rerank_response.py
index e74fc7e6..391f8a3b 100644
--- a/src/llama_stack_client/types/inference_rerank_response.py
+++ b/src/llama_stack_client/types/alpha/inference_rerank_response.py
@@ -3,7 +3,7 @@
 from typing import List
 from typing_extensions import TypeAlias
 
-from .._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["InferenceRerankResponse", "InferenceRerankResponseItem"]
 
diff --git a/src/llama_stack_client/types/inference_step.py b/src/llama_stack_client/types/alpha/inference_step.py
similarity index 89%
rename from src/llama_stack_client/types/inference_step.py
rename to src/llama_stack_client/types/alpha/inference_step.py
index 2aecb193..a7e446d1 100644
--- a/src/llama_stack_client/types/inference_step.py
+++ b/src/llama_stack_client/types/alpha/inference_step.py
@@ -6,8 +6,8 @@
 
 from pydantic import Field as FieldInfo
 
-from .._models import BaseModel
-from .shared.completion_message import CompletionMessage
+from ..._models import BaseModel
+from ..shared.completion_message import CompletionMessage
 
 __all__ = ["InferenceStep"]
 
diff --git a/src/llama_stack_client/types/job.py b/src/llama_stack_client/types/alpha/job.py
similarity index 91%
rename from src/llama_stack_client/types/job.py
rename to src/llama_stack_client/types/alpha/job.py
index 9635de38..23506692 100644
--- a/src/llama_stack_client/types/job.py
+++ b/src/llama_stack_client/types/alpha/job.py
@@ -2,7 +2,7 @@
 
 from typing_extensions import Literal
 
-from .._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["Job"]
 
diff --git a/src/llama_stack_client/types/list_post_training_jobs_response.py b/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
similarity index 89%
rename from src/llama_stack_client/types/list_post_training_jobs_response.py
rename to src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
index 09d16628..746afe99 100644
--- a/src/llama_stack_client/types/list_post_training_jobs_response.py
+++ b/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
@@ -2,7 +2,7 @@
 
 from typing import List
 
-from .._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["ListPostTrainingJobsResponse", "Data"]
 
diff --git a/src/llama_stack_client/types/memory_retrieval_step.py b/src/llama_stack_client/types/alpha/memory_retrieval_step.py
similarity index 89%
rename from src/llama_stack_client/types/memory_retrieval_step.py
rename to src/llama_stack_client/types/alpha/memory_retrieval_step.py
index 887e9986..3d44dee0 100644
--- a/src/llama_stack_client/types/memory_retrieval_step.py
+++ b/src/llama_stack_client/types/alpha/memory_retrieval_step.py
@@ -4,8 +4,8 @@
 from datetime import datetime
 from typing_extensions import Literal
 
-from .._models import BaseModel
-from .shared.interleaved_content import InterleavedContent
+from ..._models import BaseModel
+from ..shared.interleaved_content import InterleavedContent
 
 __all__ = ["MemoryRetrievalStep"]
 
diff --git a/src/llama_stack_client/types/post_training/__init__.py b/src/llama_stack_client/types/alpha/post_training/__init__.py
similarity index 100%
rename from src/llama_stack_client/types/post_training/__init__.py
rename to src/llama_stack_client/types/alpha/post_training/__init__.py
diff --git a/src/llama_stack_client/types/post_training/job_artifacts_params.py b/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
similarity index 100%
rename from src/llama_stack_client/types/post_training/job_artifacts_params.py
rename to src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
diff --git a/src/llama_stack_client/types/post_training/job_artifacts_response.py b/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
similarity index 97%
rename from src/llama_stack_client/types/post_training/job_artifacts_response.py
rename to src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
index 42784aee..74edff26 100644
--- a/src/llama_stack_client/types/post_training/job_artifacts_response.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
@@ -3,7 +3,7 @@
 from typing import List, Optional
 from datetime import datetime
 
-from ..._models import BaseModel
+from ...._models import BaseModel
 
 __all__ = ["JobArtifactsResponse", "Checkpoint", "CheckpointTrainingMetrics"]
 
diff --git a/src/llama_stack_client/types/post_training/job_cancel_params.py b/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
similarity index 100%
rename from src/llama_stack_client/types/post_training/job_cancel_params.py
rename to src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
diff --git a/src/llama_stack_client/types/post_training/job_list_response.py b/src/llama_stack_client/types/alpha/post_training/job_list_response.py
similarity index 90%
rename from src/llama_stack_client/types/post_training/job_list_response.py
rename to src/llama_stack_client/types/alpha/post_training/job_list_response.py
index cb42da2d..33bd89f1 100644
--- a/src/llama_stack_client/types/post_training/job_list_response.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_list_response.py
@@ -3,7 +3,7 @@
 from typing import List
 from typing_extensions import TypeAlias
 
-from ..._models import BaseModel
+from ...._models import BaseModel
 
 __all__ = ["JobListResponse", "JobListResponseItem"]
 
diff --git a/src/llama_stack_client/types/post_training/job_status_params.py b/src/llama_stack_client/types/alpha/post_training/job_status_params.py
similarity index 100%
rename from src/llama_stack_client/types/post_training/job_status_params.py
rename to src/llama_stack_client/types/alpha/post_training/job_status_params.py
diff --git a/src/llama_stack_client/types/post_training/job_status_response.py b/src/llama_stack_client/types/alpha/post_training/job_status_response.py
similarity index 98%
rename from src/llama_stack_client/types/post_training/job_status_response.py
rename to src/llama_stack_client/types/alpha/post_training/job_status_response.py
index 94379579..1ccc9ca2 100644
--- a/src/llama_stack_client/types/post_training/job_status_response.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_status_response.py
@@ -4,7 +4,7 @@
 from datetime import datetime
 from typing_extensions import Literal
 
-from ..._models import BaseModel
+from ...._models import BaseModel
 
 __all__ = ["JobStatusResponse", "Checkpoint", "CheckpointTrainingMetrics"]
 
diff --git a/src/llama_stack_client/types/post_training_job.py b/src/llama_stack_client/types/alpha/post_training_job.py
similarity index 83%
rename from src/llama_stack_client/types/post_training_job.py
rename to src/llama_stack_client/types/alpha/post_training_job.py
index d0ba5fce..7d9417db 100644
--- a/src/llama_stack_client/types/post_training_job.py
+++ b/src/llama_stack_client/types/alpha/post_training_job.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["PostTrainingJob"]
 
diff --git a/src/llama_stack_client/types/post_training_preference_optimize_params.py b/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
similarity index 100%
rename from src/llama_stack_client/types/post_training_preference_optimize_params.py
rename to src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
diff --git a/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py b/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
similarity index 100%
rename from src/llama_stack_client/types/post_training_supervised_fine_tune_params.py
rename to src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
diff --git a/src/llama_stack_client/types/shield_call_step.py b/src/llama_stack_client/types/alpha/shield_call_step.py
similarity index 88%
rename from src/llama_stack_client/types/shield_call_step.py
rename to src/llama_stack_client/types/alpha/shield_call_step.py
index e19734c6..80176555 100644
--- a/src/llama_stack_client/types/shield_call_step.py
+++ b/src/llama_stack_client/types/alpha/shield_call_step.py
@@ -4,8 +4,8 @@
 from datetime import datetime
 from typing_extensions import Literal
 
-from .._models import BaseModel
-from .shared.safety_violation import SafetyViolation
+from ..._models import BaseModel
+from ..shared.safety_violation import SafetyViolation
 
 __all__ = ["ShieldCallStep"]
 
diff --git a/src/llama_stack_client/types/tool_execution_step.py b/src/llama_stack_client/types/alpha/tool_execution_step.py
similarity index 91%
rename from src/llama_stack_client/types/tool_execution_step.py
rename to src/llama_stack_client/types/alpha/tool_execution_step.py
index f68115fc..1761e889 100644
--- a/src/llama_stack_client/types/tool_execution_step.py
+++ b/src/llama_stack_client/types/alpha/tool_execution_step.py
@@ -4,9 +4,9 @@
 from datetime import datetime
 from typing_extensions import Literal
 
-from .._models import BaseModel
+from ..._models import BaseModel
 from .tool_response import ToolResponse
-from .shared.tool_call import ToolCall
+from ..shared.tool_call import ToolCall
 
 __all__ = ["ToolExecutionStep"]
 
diff --git a/src/llama_stack_client/types/tool_response.py b/src/llama_stack_client/types/alpha/tool_response.py
similarity index 88%
rename from src/llama_stack_client/types/tool_response.py
rename to src/llama_stack_client/types/alpha/tool_response.py
index 7750494e..fb749f75 100644
--- a/src/llama_stack_client/types/tool_response.py
+++ b/src/llama_stack_client/types/alpha/tool_response.py
@@ -3,8 +3,8 @@
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal
 
-from .._models import BaseModel
-from .shared.interleaved_content import InterleavedContent
+from ..._models import BaseModel
+from ..shared.interleaved_content import InterleavedContent
 
 __all__ = ["ToolResponse"]
 
diff --git a/src/llama_stack_client/types/tool_response_param.py b/src/llama_stack_client/types/alpha/tool_response_param.py
similarity index 92%
rename from src/llama_stack_client/types/tool_response_param.py
rename to src/llama_stack_client/types/alpha/tool_response_param.py
index 386658f9..e833211f 100644
--- a/src/llama_stack_client/types/tool_response_param.py
+++ b/src/llama_stack_client/types/alpha/tool_response_param.py
@@ -5,7 +5,7 @@
 from typing import Dict, Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
-from .shared_params.interleaved_content import InterleavedContent
+from ..shared_params.interleaved_content import InterleavedContent
 
 __all__ = ["ToolResponseParam"]
 
diff --git a/src/llama_stack_client/types/shared/__init__.py b/src/llama_stack_client/types/shared/__init__.py
index f346cda7..2d353f89 100644
--- a/src/llama_stack_client/types/shared/__init__.py
+++ b/src/llama_stack_client/types/shared/__init__.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .metric import Metric as Metric
 from .message import Message as Message
 from .document import Document as Document
 from .tool_call import ToolCall as ToolCall
@@ -16,7 +15,5 @@
 from .safety_violation import SafetyViolation as SafetyViolation
 from .completion_message import CompletionMessage as CompletionMessage
 from .interleaved_content import InterleavedContent as InterleavedContent
-from .tool_param_definition import ToolParamDefinition as ToolParamDefinition
 from .tool_response_message import ToolResponseMessage as ToolResponseMessage
-from .chat_completion_response import ChatCompletionResponse as ChatCompletionResponse
 from .interleaved_content_item import InterleavedContentItem as InterleavedContentItem
diff --git a/src/llama_stack_client/types/shared/chat_completion_response.py b/src/llama_stack_client/types/shared/chat_completion_response.py
deleted file mode 100644
index eb78a109..00000000
--- a/src/llama_stack_client/types/shared/chat_completion_response.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Optional
-
-from .metric import Metric
-from ..._models import BaseModel
-from .completion_message import CompletionMessage
-
-__all__ = ["ChatCompletionResponse", "Logprob"]
-
-
-class Logprob(BaseModel):
-    logprobs_by_token: Dict[str, float]
-    """Dictionary mapping tokens to their log probabilities"""
-
-
-class ChatCompletionResponse(BaseModel):
-    completion_message: CompletionMessage
-    """The complete response message"""
-
-    logprobs: Optional[List[Logprob]] = None
-    """Optional log probabilities for generated tokens"""
-
-    metrics: Optional[List[Metric]] = None
-    """(Optional) List of metrics associated with the API response"""
diff --git a/src/llama_stack_client/types/shared/metric.py b/src/llama_stack_client/types/shared/metric.py
deleted file mode 100644
index 66ecdaf8..00000000
--- a/src/llama_stack_client/types/shared/metric.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-
-from ..._models import BaseModel
-
-__all__ = ["Metric"]
-
-
-class Metric(BaseModel):
-    metric: str
-    """The name of the metric"""
-
-    value: float
-    """The numeric value of the metric"""
-
-    unit: Optional[str] = None
-    """(Optional) The unit of measurement for the metric value"""
diff --git a/src/llama_stack_client/types/shared/tool_param_definition.py b/src/llama_stack_client/types/shared/tool_param_definition.py
deleted file mode 100644
index 316f1e01..00000000
--- a/src/llama_stack_client/types/shared/tool_param_definition.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-
-from ..._models import BaseModel
-
-__all__ = ["ToolParamDefinition"]
-
-
-class ToolParamDefinition(BaseModel):
-    param_type: str
-
-    default: Union[bool, float, str, List[object], object, None] = None
-
-    description: Optional[str] = None
-
-    items: Union[bool, float, str, List[object], object, None] = None
-
-    required: Optional[bool] = None
-
-    title: Optional[str] = None
diff --git a/src/llama_stack_client/types/vector_stores/__init__.py b/src/llama_stack_client/types/vector_stores/__init__.py
index 68bcf684..677030d9 100644
--- a/src/llama_stack_client/types/vector_stores/__init__.py
+++ b/src/llama_stack_client/types/vector_stores/__init__.py
@@ -8,3 +8,9 @@
 from .file_update_params import FileUpdateParams as FileUpdateParams
 from .file_delete_response import FileDeleteResponse as FileDeleteResponse
 from .file_content_response import FileContentResponse as FileContentResponse
+from .file_batch_list_params import FileBatchListParams as FileBatchListParams
+from .file_batch_create_params import FileBatchCreateParams as FileBatchCreateParams
+from .vector_store_file_batches import VectorStoreFileBatches as VectorStoreFileBatches
+from .list_vector_store_files_in_batch_response import (
+    ListVectorStoreFilesInBatchResponse as ListVectorStoreFilesInBatchResponse,
+)
diff --git a/src/llama_stack_client/types/vector_stores/file_batch_create_params.py b/src/llama_stack_client/types/vector_stores/file_batch_create_params.py
new file mode 100644
index 00000000..e6eba90a
--- /dev/null
+++ b/src/llama_stack_client/types/vector_stores/file_batch_create_params.py
@@ -0,0 +1,53 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+
+__all__ = [
+    "FileBatchCreateParams",
+    "ChunkingStrategy",
+    "ChunkingStrategyVectorStoreChunkingStrategyAuto",
+    "ChunkingStrategyVectorStoreChunkingStrategyStatic",
+    "ChunkingStrategyVectorStoreChunkingStrategyStaticStatic",
+]
+
+
+class FileBatchCreateParams(TypedDict, total=False):
+    file_ids: Required[SequenceNotStr[str]]
+    """A list of File IDs that the vector store should use."""
+
+    attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """(Optional) Key-value attributes to store with the files."""
+
+    chunking_strategy: ChunkingStrategy
+    """(Optional) The chunking strategy used to chunk the file(s). Defaults to auto."""
+
+
+class ChunkingStrategyVectorStoreChunkingStrategyAuto(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+    """Strategy type, always "auto" for automatic chunking"""
+
+
+class ChunkingStrategyVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """Number of tokens to overlap between adjacent chunks"""
+
+    max_chunk_size_tokens: Required[int]
+    """Maximum number of tokens per chunk, must be between 100 and 4096"""
+
+
+class ChunkingStrategyVectorStoreChunkingStrategyStatic(TypedDict, total=False):
+    static: Required[ChunkingStrategyVectorStoreChunkingStrategyStaticStatic]
+    """Configuration parameters for the static chunking strategy"""
+
+    type: Required[Literal["static"]]
+    """Strategy type, always "static" for static chunking"""
+
+
+ChunkingStrategy: TypeAlias = Union[
+    ChunkingStrategyVectorStoreChunkingStrategyAuto, ChunkingStrategyVectorStoreChunkingStrategyStatic
+]
diff --git a/src/llama_stack_client/types/vector_stores/file_batch_list_params.py b/src/llama_stack_client/types/vector_stores/file_batch_list_params.py
new file mode 100644
index 00000000..79e67eb1
--- /dev/null
+++ b/src/llama_stack_client/types/vector_stores/file_batch_list_params.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["FileBatchListParams"]
+
+
+class FileBatchListParams(TypedDict, total=False):
+    vector_store_id: Required[str]
+
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list.
+    """
+
+    before: str
+    """A cursor for use in pagination.
+
+    `before` is an object ID that defines your place in the list.
+    """
+
+    filter: str
+    """Filter by file status. One of in_progress, completed, failed, cancelled."""
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: str
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/src/llama_stack_client/types/vector_stores/list_vector_store_files_in_batch_response.py b/src/llama_stack_client/types/vector_stores/list_vector_store_files_in_batch_response.py
new file mode 100644
index 00000000..34ca9e46
--- /dev/null
+++ b/src/llama_stack_client/types/vector_stores/list_vector_store_files_in_batch_response.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+from .vector_store_file import VectorStoreFile
+
+__all__ = ["ListVectorStoreFilesInBatchResponse"]
+
+
+class ListVectorStoreFilesInBatchResponse(BaseModel):
+    data: List[VectorStoreFile]
+    """List of vector store file objects in the batch"""
+
+    has_more: bool
+    """Whether there are more files available beyond this page"""
+
+    object: str
+    """Object type identifier, always "list" """
+
+    first_id: Optional[str] = None
+    """(Optional) ID of the first file in the list for pagination"""
+
+    last_id: Optional[str] = None
+    """(Optional) ID of the last file in the list for pagination"""
diff --git a/src/llama_stack_client/types/vector_stores/vector_store_file_batches.py b/src/llama_stack_client/types/vector_stores/vector_store_file_batches.py
new file mode 100644
index 00000000..738f7edc
--- /dev/null
+++ b/src/llama_stack_client/types/vector_stores/vector_store_file_batches.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["VectorStoreFileBatches", "FileCounts"]
+
+
+class FileCounts(BaseModel):
+    cancelled: int
+    """Number of files that had their processing cancelled"""
+
+    completed: int
+    """Number of files that have been successfully processed"""
+
+    failed: int
+    """Number of files that failed to process"""
+
+    in_progress: int
+    """Number of files currently being processed"""
+
+    total: int
+    """Total number of files in the vector store"""
+
+
+class VectorStoreFileBatches(BaseModel):
+    id: str
+    """Unique identifier for the file batch"""
+
+    created_at: int
+    """Timestamp when the file batch was created"""
+
+    file_counts: FileCounts
+    """File processing status counts for the batch"""
+
+    object: str
+    """Object type identifier, always "vector_store.file_batch" """
+
+    status: Literal["completed", "in_progress", "cancelled", "failed"]
+    """Current processing status of the file batch"""
+
+    vector_store_id: str
+    """ID of the vector store containing the file batch"""
diff --git a/tests/api_resources/agents/__init__.py b/tests/api_resources/alpha/__init__.py
similarity index 100%
rename from tests/api_resources/agents/__init__.py
rename to tests/api_resources/alpha/__init__.py
diff --git a/tests/api_resources/eval/__init__.py b/tests/api_resources/alpha/agents/__init__.py
similarity index 100%
rename from tests/api_resources/eval/__init__.py
rename to tests/api_resources/alpha/agents/__init__.py
diff --git a/tests/api_resources/agents/test_session.py b/tests/api_resources/alpha/agents/test_session.py
similarity index 82%
rename from tests/api_resources/agents/test_session.py
rename to tests/api_resources/alpha/agents/test_session.py
index b49ab492..9c49e6bc 100644
--- a/tests/api_resources/agents/test_session.py
+++ b/tests/api_resources/alpha/agents/test_session.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.agents import (
+from llama_stack_client.types.alpha.agents import (
     Session,
     SessionListResponse,
     SessionCreateResponse,
@@ -23,7 +23,7 @@ class TestSession:
 
     @parametrize
     def test_method_create(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.create(
+        session = client.alpha.agents.session.create(
             agent_id="agent_id",
             session_name="session_name",
         )
@@ -31,7 +31,7 @@ def test_method_create(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_create(self, client: LlamaStackClient) -> None:
-        response = client.agents.session.with_raw_response.create(
+        response = client.alpha.agents.session.with_raw_response.create(
             agent_id="agent_id",
             session_name="session_name",
         )
@@ -43,7 +43,7 @@ def test_raw_response_create(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: LlamaStackClient) -> None:
-        with client.agents.session.with_streaming_response.create(
+        with client.alpha.agents.session.with_streaming_response.create(
             agent_id="agent_id",
             session_name="session_name",
         ) as response:
@@ -58,14 +58,14 @@ def test_streaming_response_create(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_create(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.session.with_raw_response.create(
+            client.alpha.agents.session.with_raw_response.create(
                 agent_id="",
                 session_name="session_name",
             )
 
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.retrieve(
+        session = client.alpha.agents.session.retrieve(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -73,7 +73,7 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.retrieve(
+        session = client.alpha.agents.session.retrieve(
             session_id="session_id",
             agent_id="agent_id",
             turn_ids=["string"],
@@ -82,7 +82,7 @@ def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.agents.session.with_raw_response.retrieve(
+        response = client.alpha.agents.session.with_raw_response.retrieve(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -94,7 +94,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.agents.session.with_streaming_response.retrieve(
+        with client.alpha.agents.session.with_streaming_response.retrieve(
             session_id="session_id",
             agent_id="agent_id",
         ) as response:
@@ -109,27 +109,27 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.session.with_raw_response.retrieve(
+            client.alpha.agents.session.with_raw_response.retrieve(
                 session_id="session_id",
                 agent_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.session.with_raw_response.retrieve(
+            client.alpha.agents.session.with_raw_response.retrieve(
                 session_id="",
                 agent_id="agent_id",
             )
 
     @parametrize
     def test_method_list(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.list(
+        session = client.alpha.agents.session.list(
             agent_id="agent_id",
         )
         assert_matches_type(SessionListResponse, session, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.list(
+        session = client.alpha.agents.session.list(
             agent_id="agent_id",
             limit=0,
             start_index=0,
@@ -138,7 +138,7 @@ def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.agents.session.with_raw_response.list(
+        response = client.alpha.agents.session.with_raw_response.list(
             agent_id="agent_id",
         )
 
@@ -149,7 +149,7 @@ def test_raw_response_list(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.agents.session.with_streaming_response.list(
+        with client.alpha.agents.session.with_streaming_response.list(
             agent_id="agent_id",
         ) as response:
             assert not response.is_closed
@@ -163,13 +163,13 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_list(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.session.with_raw_response.list(
+            client.alpha.agents.session.with_raw_response.list(
                 agent_id="",
             )
 
     @parametrize
     def test_method_delete(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.delete(
+        session = client.alpha.agents.session.delete(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -177,7 +177,7 @@ def test_method_delete(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_delete(self, client: LlamaStackClient) -> None:
-        response = client.agents.session.with_raw_response.delete(
+        response = client.alpha.agents.session.with_raw_response.delete(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -189,7 +189,7 @@ def test_raw_response_delete(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
-        with client.agents.session.with_streaming_response.delete(
+        with client.alpha.agents.session.with_streaming_response.delete(
             session_id="session_id",
             agent_id="agent_id",
         ) as response:
@@ -204,13 +204,13 @@ def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_delete(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.session.with_raw_response.delete(
+            client.alpha.agents.session.with_raw_response.delete(
                 session_id="session_id",
                 agent_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.session.with_raw_response.delete(
+            client.alpha.agents.session.with_raw_response.delete(
                 session_id="",
                 agent_id="agent_id",
             )
@@ -223,7 +223,7 @@ class TestAsyncSession:
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.create(
+        session = await async_client.alpha.agents.session.create(
             agent_id="agent_id",
             session_name="session_name",
         )
@@ -231,7 +231,7 @@ async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.session.with_raw_response.create(
+        response = await async_client.alpha.agents.session.with_raw_response.create(
             agent_id="agent_id",
             session_name="session_name",
         )
@@ -243,7 +243,7 @@ async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.session.with_streaming_response.create(
+        async with async_client.alpha.agents.session.with_streaming_response.create(
             agent_id="agent_id",
             session_name="session_name",
         ) as response:
@@ -258,14 +258,14 @@ async def test_streaming_response_create(self, async_client: AsyncLlamaStackClie
     @parametrize
     async def test_path_params_create(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.session.with_raw_response.create(
+            await async_client.alpha.agents.session.with_raw_response.create(
                 agent_id="",
                 session_name="session_name",
             )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.retrieve(
+        session = await async_client.alpha.agents.session.retrieve(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -273,7 +273,7 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non
 
     @parametrize
     async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.retrieve(
+        session = await async_client.alpha.agents.session.retrieve(
             session_id="session_id",
             agent_id="agent_id",
             turn_ids=["string"],
@@ -282,7 +282,7 @@ async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaSta
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.session.with_raw_response.retrieve(
+        response = await async_client.alpha.agents.session.with_raw_response.retrieve(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -294,7 +294,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.session.with_streaming_response.retrieve(
+        async with async_client.alpha.agents.session.with_streaming_response.retrieve(
             session_id="session_id",
             agent_id="agent_id",
         ) as response:
@@ -309,27 +309,27 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.session.with_raw_response.retrieve(
+            await async_client.alpha.agents.session.with_raw_response.retrieve(
                 session_id="session_id",
                 agent_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.session.with_raw_response.retrieve(
+            await async_client.alpha.agents.session.with_raw_response.retrieve(
                 session_id="",
                 agent_id="agent_id",
             )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.list(
+        session = await async_client.alpha.agents.session.list(
             agent_id="agent_id",
         )
         assert_matches_type(SessionListResponse, session, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.list(
+        session = await async_client.alpha.agents.session.list(
             agent_id="agent_id",
             limit=0,
             start_index=0,
@@ -338,7 +338,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackCl
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.session.with_raw_response.list(
+        response = await async_client.alpha.agents.session.with_raw_response.list(
             agent_id="agent_id",
         )
 
@@ -349,7 +349,7 @@ async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> N
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.session.with_streaming_response.list(
+        async with async_client.alpha.agents.session.with_streaming_response.list(
             agent_id="agent_id",
         ) as response:
             assert not response.is_closed
@@ -363,13 +363,13 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
     @parametrize
     async def test_path_params_list(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.session.with_raw_response.list(
+            await async_client.alpha.agents.session.with_raw_response.list(
                 agent_id="",
             )
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.delete(
+        session = await async_client.alpha.agents.session.delete(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -377,7 +377,7 @@ async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.session.with_raw_response.delete(
+        response = await async_client.alpha.agents.session.with_raw_response.delete(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -389,7 +389,7 @@ async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.session.with_streaming_response.delete(
+        async with async_client.alpha.agents.session.with_streaming_response.delete(
             session_id="session_id",
             agent_id="agent_id",
         ) as response:
@@ -404,13 +404,13 @@ async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClie
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.session.with_raw_response.delete(
+            await async_client.alpha.agents.session.with_raw_response.delete(
                 session_id="session_id",
                 agent_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.session.with_raw_response.delete(
+            await async_client.alpha.agents.session.with_raw_response.delete(
                 session_id="",
                 agent_id="agent_id",
             )
diff --git a/tests/api_resources/agents/test_steps.py b/tests/api_resources/alpha/agents/test_steps.py
similarity index 84%
rename from tests/api_resources/agents/test_steps.py
rename to tests/api_resources/alpha/agents/test_steps.py
index 5555a9a4..5bf35fc3 100644
--- a/tests/api_resources/agents/test_steps.py
+++ b/tests/api_resources/alpha/agents/test_steps.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.agents import StepRetrieveResponse
+from llama_stack_client.types.alpha.agents import StepRetrieveResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -19,7 +19,7 @@ class TestSteps:
 
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        step = client.agents.steps.retrieve(
+        step = client.alpha.agents.steps.retrieve(
             step_id="step_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -29,7 +29,7 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.agents.steps.with_raw_response.retrieve(
+        response = client.alpha.agents.steps.with_raw_response.retrieve(
             step_id="step_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -43,7 +43,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.agents.steps.with_streaming_response.retrieve(
+        with client.alpha.agents.steps.with_streaming_response.retrieve(
             step_id="step_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -60,7 +60,7 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.steps.with_raw_response.retrieve(
+            client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="step_id",
                 agent_id="",
                 session_id="session_id",
@@ -68,7 +68,7 @@ def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.steps.with_raw_response.retrieve(
+            client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="step_id",
                 agent_id="agent_id",
                 session_id="",
@@ -76,7 +76,7 @@ def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.agents.steps.with_raw_response.retrieve(
+            client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="step_id",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -84,7 +84,7 @@ def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
-            client.agents.steps.with_raw_response.retrieve(
+            client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -99,7 +99,7 @@ class TestAsyncSteps:
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        step = await async_client.agents.steps.retrieve(
+        step = await async_client.alpha.agents.steps.retrieve(
             step_id="step_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -109,7 +109,7 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.steps.with_raw_response.retrieve(
+        response = await async_client.alpha.agents.steps.with_raw_response.retrieve(
             step_id="step_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -123,7 +123,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.steps.with_streaming_response.retrieve(
+        async with async_client.alpha.agents.steps.with_streaming_response.retrieve(
             step_id="step_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -140,7 +140,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.steps.with_raw_response.retrieve(
+            await async_client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="step_id",
                 agent_id="",
                 session_id="session_id",
@@ -148,7 +148,7 @@ async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.steps.with_raw_response.retrieve(
+            await async_client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="step_id",
                 agent_id="agent_id",
                 session_id="",
@@ -156,7 +156,7 @@ async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.agents.steps.with_raw_response.retrieve(
+            await async_client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="step_id",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -164,7 +164,7 @@ async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
-            await async_client.agents.steps.with_raw_response.retrieve(
+            await async_client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="",
                 agent_id="agent_id",
                 session_id="session_id",
diff --git a/tests/api_resources/agents/test_turn.py b/tests/api_resources/alpha/agents/test_turn.py
similarity index 88%
rename from tests/api_resources/agents/test_turn.py
rename to tests/api_resources/alpha/agents/test_turn.py
index 31eb53f9..9a2a500f 100644
--- a/tests/api_resources/agents/test_turn.py
+++ b/tests/api_resources/alpha/agents/test_turn.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.agents import Turn
+from llama_stack_client.types.alpha.agents import Turn
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -19,7 +19,7 @@ class TestTurn:
 
     @parametrize
     def test_method_create_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.create(
+        turn = client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -33,7 +33,7 @@ def test_method_create_overload_1(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.create(
+        turn = client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -61,7 +61,7 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
 
     @parametrize
     def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.create(
+        response = client.alpha.agents.turn.with_raw_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -79,7 +79,7 @@ def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.create(
+        with client.alpha.agents.turn.with_streaming_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -100,7 +100,7 @@ def test_streaming_response_create_overload_1(self, client: LlamaStackClient) ->
     @parametrize
     def test_path_params_create_overload_1(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.create(
+            client.alpha.agents.turn.with_raw_response.create(
                 session_id="session_id",
                 agent_id="",
                 messages=[
@@ -112,7 +112,7 @@ def test_path_params_create_overload_1(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.create(
+            client.alpha.agents.turn.with_raw_response.create(
                 session_id="",
                 agent_id="agent_id",
                 messages=[
@@ -125,7 +125,7 @@ def test_path_params_create_overload_1(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_create_overload_2(self, client: LlamaStackClient) -> None:
-        turn_stream = client.agents.turn.create(
+        turn_stream = client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -140,7 +140,7 @@ def test_method_create_overload_2(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient) -> None:
-        turn_stream = client.agents.turn.create(
+        turn_stream = client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -168,7 +168,7 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
 
     @parametrize
     def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.create(
+        response = client.alpha.agents.turn.with_raw_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -186,7 +186,7 @@ def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.create(
+        with client.alpha.agents.turn.with_streaming_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -208,7 +208,7 @@ def test_streaming_response_create_overload_2(self, client: LlamaStackClient) ->
     @parametrize
     def test_path_params_create_overload_2(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.create(
+            client.alpha.agents.turn.with_raw_response.create(
                 session_id="session_id",
                 agent_id="",
                 messages=[
@@ -221,7 +221,7 @@ def test_path_params_create_overload_2(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.create(
+            client.alpha.agents.turn.with_raw_response.create(
                 session_id="",
                 agent_id="agent_id",
                 messages=[
@@ -235,7 +235,7 @@ def test_path_params_create_overload_2(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.retrieve(
+        turn = client.alpha.agents.turn.retrieve(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -244,7 +244,7 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.retrieve(
+        response = client.alpha.agents.turn.with_raw_response.retrieve(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -257,7 +257,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.retrieve(
+        with client.alpha.agents.turn.with_streaming_response.retrieve(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -273,21 +273,21 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.retrieve(
+            client.alpha.agents.turn.with_raw_response.retrieve(
                 turn_id="turn_id",
                 agent_id="",
                 session_id="session_id",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.retrieve(
+            client.alpha.agents.turn.with_raw_response.retrieve(
                 turn_id="turn_id",
                 agent_id="agent_id",
                 session_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.agents.turn.with_raw_response.retrieve(
+            client.alpha.agents.turn.with_raw_response.retrieve(
                 turn_id="",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -295,7 +295,7 @@ def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_resume_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.resume(
+        turn = client.alpha.agents.turn.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -311,7 +311,7 @@ def test_method_resume_overload_1(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_resume_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.resume(
+        turn = client.alpha.agents.turn.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -329,7 +329,7 @@ def test_method_resume_with_all_params_overload_1(self, client: LlamaStackClient
 
     @parametrize
     def test_raw_response_resume_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.resume(
+        response = client.alpha.agents.turn.with_raw_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -349,7 +349,7 @@ def test_raw_response_resume_overload_1(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_resume_overload_1(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.resume(
+        with client.alpha.agents.turn.with_streaming_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -372,7 +372,7 @@ def test_streaming_response_resume_overload_1(self, client: LlamaStackClient) ->
     @parametrize
     def test_path_params_resume_overload_1(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
+            client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="",
                 session_id="session_id",
@@ -386,7 +386,7 @@ def test_path_params_resume_overload_1(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
+            client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="agent_id",
                 session_id="",
@@ -400,7 +400,7 @@ def test_path_params_resume_overload_1(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
+            client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -415,7 +415,7 @@ def test_path_params_resume_overload_1(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_resume_overload_2(self, client: LlamaStackClient) -> None:
-        turn_stream = client.agents.turn.resume(
+        turn_stream = client.alpha.agents.turn.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -432,7 +432,7 @@ def test_method_resume_overload_2(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_resume_overload_2(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.resume(
+        response = client.alpha.agents.turn.with_raw_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -452,7 +452,7 @@ def test_raw_response_resume_overload_2(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_resume_overload_2(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.resume(
+        with client.alpha.agents.turn.with_streaming_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -476,7 +476,7 @@ def test_streaming_response_resume_overload_2(self, client: LlamaStackClient) ->
     @parametrize
     def test_path_params_resume_overload_2(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
+            client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="",
                 session_id="session_id",
@@ -491,7 +491,7 @@ def test_path_params_resume_overload_2(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
+            client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="agent_id",
                 session_id="",
@@ -506,7 +506,7 @@ def test_path_params_resume_overload_2(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
+            client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -528,7 +528,7 @@ class TestAsyncTurn:
 
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.create(
+        turn = await async_client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -542,7 +542,7 @@ async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClien
 
     @parametrize
     async def test_method_create_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.create(
+        turn = await async_client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -570,7 +570,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
 
     @parametrize
     async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.create(
+        response = await async_client.alpha.agents.turn.with_raw_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -588,7 +588,7 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStac
 
     @parametrize
     async def test_streaming_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.create(
+        async with async_client.alpha.agents.turn.with_streaming_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -609,7 +609,7 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncLla
     @parametrize
     async def test_path_params_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.create(
+            await async_client.alpha.agents.turn.with_raw_response.create(
                 session_id="session_id",
                 agent_id="",
                 messages=[
@@ -621,7 +621,7 @@ async def test_path_params_create_overload_1(self, async_client: AsyncLlamaStack
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.create(
+            await async_client.alpha.agents.turn.with_raw_response.create(
                 session_id="",
                 agent_id="agent_id",
                 messages=[
@@ -634,7 +634,7 @@ async def test_path_params_create_overload_1(self, async_client: AsyncLlamaStack
 
     @parametrize
     async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        turn_stream = await async_client.agents.turn.create(
+        turn_stream = await async_client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -649,7 +649,7 @@ async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClien
 
     @parametrize
     async def test_method_create_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        turn_stream = await async_client.agents.turn.create(
+        turn_stream = await async_client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -677,7 +677,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
 
     @parametrize
     async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.create(
+        response = await async_client.alpha.agents.turn.with_raw_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -695,7 +695,7 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStac
 
     @parametrize
     async def test_streaming_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.create(
+        async with async_client.alpha.agents.turn.with_streaming_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -717,7 +717,7 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncLla
     @parametrize
     async def test_path_params_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.create(
+            await async_client.alpha.agents.turn.with_raw_response.create(
                 session_id="session_id",
                 agent_id="",
                 messages=[
@@ -730,7 +730,7 @@ async def test_path_params_create_overload_2(self, async_client: AsyncLlamaStack
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.create(
+            await async_client.alpha.agents.turn.with_raw_response.create(
                 session_id="",
                 agent_id="agent_id",
                 messages=[
@@ -744,7 +744,7 @@ async def test_path_params_create_overload_2(self, async_client: AsyncLlamaStack
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.retrieve(
+        turn = await async_client.alpha.agents.turn.retrieve(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -753,7 +753,7 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.retrieve(
+        response = await async_client.alpha.agents.turn.with_raw_response.retrieve(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -766,7 +766,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.retrieve(
+        async with async_client.alpha.agents.turn.with_streaming_response.retrieve(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -782,21 +782,21 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.retrieve(
+            await async_client.alpha.agents.turn.with_raw_response.retrieve(
                 turn_id="turn_id",
                 agent_id="",
                 session_id="session_id",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.retrieve(
+            await async_client.alpha.agents.turn.with_raw_response.retrieve(
                 turn_id="turn_id",
                 agent_id="agent_id",
                 session_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.retrieve(
+            await async_client.alpha.agents.turn.with_raw_response.retrieve(
                 turn_id="",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -804,7 +804,7 @@ async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -
 
     @parametrize
     async def test_method_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.resume(
+        turn = await async_client.alpha.agents.turn.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -820,7 +820,7 @@ async def test_method_resume_overload_1(self, async_client: AsyncLlamaStackClien
 
     @parametrize
     async def test_method_resume_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.resume(
+        turn = await async_client.alpha.agents.turn.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -838,7 +838,7 @@ async def test_method_resume_with_all_params_overload_1(self, async_client: Asyn
 
     @parametrize
     async def test_raw_response_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.resume(
+        response = await async_client.alpha.agents.turn.with_raw_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -858,7 +858,7 @@ async def test_raw_response_resume_overload_1(self, async_client: AsyncLlamaStac
 
     @parametrize
     async def test_streaming_response_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.resume(
+        async with async_client.alpha.agents.turn.with_streaming_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -881,7 +881,7 @@ async def test_streaming_response_resume_overload_1(self, async_client: AsyncLla
     @parametrize
     async def test_path_params_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
+            await async_client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="",
                 session_id="session_id",
@@ -895,7 +895,7 @@ async def test_path_params_resume_overload_1(self, async_client: AsyncLlamaStack
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
+            await async_client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="agent_id",
                 session_id="",
@@ -909,7 +909,7 @@ async def test_path_params_resume_overload_1(self, async_client: AsyncLlamaStack
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
+            await async_client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -924,7 +924,7 @@ async def test_path_params_resume_overload_1(self, async_client: AsyncLlamaStack
 
     @parametrize
     async def test_method_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        turn_stream = await async_client.agents.turn.resume(
+        turn_stream = await async_client.alpha.agents.turn.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -941,7 +941,7 @@ async def test_method_resume_overload_2(self, async_client: AsyncLlamaStackClien
 
     @parametrize
     async def test_raw_response_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.resume(
+        response = await async_client.alpha.agents.turn.with_raw_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -961,7 +961,7 @@ async def test_raw_response_resume_overload_2(self, async_client: AsyncLlamaStac
 
     @parametrize
     async def test_streaming_response_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.resume(
+        async with async_client.alpha.agents.turn.with_streaming_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -985,7 +985,7 @@ async def test_streaming_response_resume_overload_2(self, async_client: AsyncLla
     @parametrize
     async def test_path_params_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
+            await async_client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="",
                 session_id="session_id",
@@ -1000,7 +1000,7 @@ async def test_path_params_resume_overload_2(self, async_client: AsyncLlamaStack
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
+            await async_client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="agent_id",
                 session_id="",
@@ -1015,7 +1015,7 @@ async def test_path_params_resume_overload_2(self, async_client: AsyncLlamaStack
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
+            await async_client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="",
                 agent_id="agent_id",
                 session_id="session_id",
diff --git a/tests/api_resources/post_training/__init__.py b/tests/api_resources/alpha/eval/__init__.py
similarity index 100%
rename from tests/api_resources/post_training/__init__.py
rename to tests/api_resources/alpha/eval/__init__.py
diff --git a/tests/api_resources/eval/test_jobs.py b/tests/api_resources/alpha/eval/test_jobs.py
similarity index 82%
rename from tests/api_resources/eval/test_jobs.py
rename to tests/api_resources/alpha/eval/test_jobs.py
index 17b02896..f4ea9ce1 100644
--- a/tests/api_resources/eval/test_jobs.py
+++ b/tests/api_resources/alpha/eval/test_jobs.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import Job, EvaluateResponse
+from llama_stack_client.types.alpha import Job, EvaluateResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -19,7 +19,7 @@ class TestJobs:
 
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        job = client.eval.jobs.retrieve(
+        job = client.alpha.eval.jobs.retrieve(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -27,7 +27,7 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.eval.jobs.with_raw_response.retrieve(
+        response = client.alpha.eval.jobs.with_raw_response.retrieve(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -39,7 +39,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.eval.jobs.with_streaming_response.retrieve(
+        with client.alpha.eval.jobs.with_streaming_response.retrieve(
             job_id="job_id",
             benchmark_id="benchmark_id",
         ) as response:
@@ -54,20 +54,20 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.jobs.with_raw_response.retrieve(
+            client.alpha.eval.jobs.with_raw_response.retrieve(
                 job_id="job_id",
                 benchmark_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            client.eval.jobs.with_raw_response.retrieve(
+            client.alpha.eval.jobs.with_raw_response.retrieve(
                 job_id="",
                 benchmark_id="benchmark_id",
             )
 
     @parametrize
     def test_method_cancel(self, client: LlamaStackClient) -> None:
-        job = client.eval.jobs.cancel(
+        job = client.alpha.eval.jobs.cancel(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -75,7 +75,7 @@ def test_method_cancel(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
-        response = client.eval.jobs.with_raw_response.cancel(
+        response = client.alpha.eval.jobs.with_raw_response.cancel(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -87,7 +87,7 @@ def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
-        with client.eval.jobs.with_streaming_response.cancel(
+        with client.alpha.eval.jobs.with_streaming_response.cancel(
             job_id="job_id",
             benchmark_id="benchmark_id",
         ) as response:
@@ -102,20 +102,20 @@ def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_cancel(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.jobs.with_raw_response.cancel(
+            client.alpha.eval.jobs.with_raw_response.cancel(
                 job_id="job_id",
                 benchmark_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            client.eval.jobs.with_raw_response.cancel(
+            client.alpha.eval.jobs.with_raw_response.cancel(
                 job_id="",
                 benchmark_id="benchmark_id",
             )
 
     @parametrize
     def test_method_status(self, client: LlamaStackClient) -> None:
-        job = client.eval.jobs.status(
+        job = client.alpha.eval.jobs.status(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -123,7 +123,7 @@ def test_method_status(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_status(self, client: LlamaStackClient) -> None:
-        response = client.eval.jobs.with_raw_response.status(
+        response = client.alpha.eval.jobs.with_raw_response.status(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -135,7 +135,7 @@ def test_raw_response_status(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_status(self, client: LlamaStackClient) -> None:
-        with client.eval.jobs.with_streaming_response.status(
+        with client.alpha.eval.jobs.with_streaming_response.status(
             job_id="job_id",
             benchmark_id="benchmark_id",
         ) as response:
@@ -150,13 +150,13 @@ def test_streaming_response_status(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_status(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.jobs.with_raw_response.status(
+            client.alpha.eval.jobs.with_raw_response.status(
                 job_id="job_id",
                 benchmark_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            client.eval.jobs.with_raw_response.status(
+            client.alpha.eval.jobs.with_raw_response.status(
                 job_id="",
                 benchmark_id="benchmark_id",
             )
@@ -169,7 +169,7 @@ class TestAsyncJobs:
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.eval.jobs.retrieve(
+        job = await async_client.alpha.eval.jobs.retrieve(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -177,7 +177,7 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.jobs.with_raw_response.retrieve(
+        response = await async_client.alpha.eval.jobs.with_raw_response.retrieve(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -189,7 +189,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.jobs.with_streaming_response.retrieve(
+        async with async_client.alpha.eval.jobs.with_streaming_response.retrieve(
             job_id="job_id",
             benchmark_id="benchmark_id",
         ) as response:
@@ -204,20 +204,20 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.retrieve(
+            await async_client.alpha.eval.jobs.with_raw_response.retrieve(
                 job_id="job_id",
                 benchmark_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.retrieve(
+            await async_client.alpha.eval.jobs.with_raw_response.retrieve(
                 job_id="",
                 benchmark_id="benchmark_id",
             )
 
     @parametrize
     async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.eval.jobs.cancel(
+        job = await async_client.alpha.eval.jobs.cancel(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -225,7 +225,7 @@ async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
 
     @parametrize
     async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.jobs.with_raw_response.cancel(
+        response = await async_client.alpha.eval.jobs.with_raw_response.cancel(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -237,7 +237,7 @@ async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.jobs.with_streaming_response.cancel(
+        async with async_client.alpha.eval.jobs.with_streaming_response.cancel(
             job_id="job_id",
             benchmark_id="benchmark_id",
         ) as response:
@@ -252,20 +252,20 @@ async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClie
     @parametrize
     async def test_path_params_cancel(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.cancel(
+            await async_client.alpha.eval.jobs.with_raw_response.cancel(
                 job_id="job_id",
                 benchmark_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.cancel(
+            await async_client.alpha.eval.jobs.with_raw_response.cancel(
                 job_id="",
                 benchmark_id="benchmark_id",
             )
 
     @parametrize
     async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.eval.jobs.status(
+        job = await async_client.alpha.eval.jobs.status(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -273,7 +273,7 @@ async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
 
     @parametrize
     async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.jobs.with_raw_response.status(
+        response = await async_client.alpha.eval.jobs.with_raw_response.status(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -285,7 +285,7 @@ async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.jobs.with_streaming_response.status(
+        async with async_client.alpha.eval.jobs.with_streaming_response.status(
             job_id="job_id",
             benchmark_id="benchmark_id",
         ) as response:
@@ -300,13 +300,13 @@ async def test_streaming_response_status(self, async_client: AsyncLlamaStackClie
     @parametrize
     async def test_path_params_status(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.status(
+            await async_client.alpha.eval.jobs.with_raw_response.status(
                 job_id="job_id",
                 benchmark_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.status(
+            await async_client.alpha.eval.jobs.with_raw_response.status(
                 job_id="",
                 benchmark_id="benchmark_id",
             )
diff --git a/tests/api_resources/alpha/post_training/__init__.py b/tests/api_resources/alpha/post_training/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/alpha/post_training/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/post_training/test_job.py b/tests/api_resources/alpha/post_training/test_job.py
similarity index 81%
rename from tests/api_resources/post_training/test_job.py
rename to tests/api_resources/alpha/post_training/test_job.py
index 158eafbc..3b47132f 100644
--- a/tests/api_resources/post_training/test_job.py
+++ b/tests/api_resources/alpha/post_training/test_job.py
@@ -9,11 +9,11 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.post_training import (
+from llama_stack_client.types.alpha.post_training import (
     JobStatusResponse,
     JobArtifactsResponse,
 )
-from llama_stack_client.types.list_post_training_jobs_response import Data
+from llama_stack_client.types.alpha.list_post_training_jobs_response import Data
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -23,12 +23,12 @@ class TestJob:
 
     @parametrize
     def test_method_list(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.list()
+        job = client.alpha.post_training.job.list()
         assert_matches_type(List[Data], job, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.post_training.job.with_raw_response.list()
+        response = client.alpha.post_training.job.with_raw_response.list()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -37,7 +37,7 @@ def test_raw_response_list(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.post_training.job.with_streaming_response.list() as response:
+        with client.alpha.post_training.job.with_streaming_response.list() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -48,14 +48,14 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_artifacts(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.artifacts(
+        job = client.alpha.post_training.job.artifacts(
             job_uuid="job_uuid",
         )
         assert_matches_type(JobArtifactsResponse, job, path=["response"])
 
     @parametrize
     def test_raw_response_artifacts(self, client: LlamaStackClient) -> None:
-        response = client.post_training.job.with_raw_response.artifacts(
+        response = client.alpha.post_training.job.with_raw_response.artifacts(
             job_uuid="job_uuid",
         )
 
@@ -66,7 +66,7 @@ def test_raw_response_artifacts(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_artifacts(self, client: LlamaStackClient) -> None:
-        with client.post_training.job.with_streaming_response.artifacts(
+        with client.alpha.post_training.job.with_streaming_response.artifacts(
             job_uuid="job_uuid",
         ) as response:
             assert not response.is_closed
@@ -79,14 +79,14 @@ def test_streaming_response_artifacts(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_cancel(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.cancel(
+        job = client.alpha.post_training.job.cancel(
             job_uuid="job_uuid",
         )
         assert job is None
 
     @parametrize
     def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
-        response = client.post_training.job.with_raw_response.cancel(
+        response = client.alpha.post_training.job.with_raw_response.cancel(
             job_uuid="job_uuid",
         )
 
@@ -97,7 +97,7 @@ def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
-        with client.post_training.job.with_streaming_response.cancel(
+        with client.alpha.post_training.job.with_streaming_response.cancel(
             job_uuid="job_uuid",
         ) as response:
             assert not response.is_closed
@@ -110,14 +110,14 @@ def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_status(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.status(
+        job = client.alpha.post_training.job.status(
             job_uuid="job_uuid",
         )
         assert_matches_type(JobStatusResponse, job, path=["response"])
 
     @parametrize
     def test_raw_response_status(self, client: LlamaStackClient) -> None:
-        response = client.post_training.job.with_raw_response.status(
+        response = client.alpha.post_training.job.with_raw_response.status(
             job_uuid="job_uuid",
         )
 
@@ -128,7 +128,7 @@ def test_raw_response_status(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_status(self, client: LlamaStackClient) -> None:
-        with client.post_training.job.with_streaming_response.status(
+        with client.alpha.post_training.job.with_streaming_response.status(
             job_uuid="job_uuid",
         ) as response:
             assert not response.is_closed
@@ -147,12 +147,12 @@ class TestAsyncJob:
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.list()
+        job = await async_client.alpha.post_training.job.list()
         assert_matches_type(List[Data], job, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.job.with_raw_response.list()
+        response = await async_client.alpha.post_training.job.with_raw_response.list()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -161,7 +161,7 @@ async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> N
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.job.with_streaming_response.list() as response:
+        async with async_client.alpha.post_training.job.with_streaming_response.list() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -172,14 +172,14 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
 
     @parametrize
     async def test_method_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.artifacts(
+        job = await async_client.alpha.post_training.job.artifacts(
             job_uuid="job_uuid",
         )
         assert_matches_type(JobArtifactsResponse, job, path=["response"])
 
     @parametrize
     async def test_raw_response_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.job.with_raw_response.artifacts(
+        response = await async_client.alpha.post_training.job.with_raw_response.artifacts(
             job_uuid="job_uuid",
         )
 
@@ -190,7 +190,7 @@ async def test_raw_response_artifacts(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.job.with_streaming_response.artifacts(
+        async with async_client.alpha.post_training.job.with_streaming_response.artifacts(
             job_uuid="job_uuid",
         ) as response:
             assert not response.is_closed
@@ -203,14 +203,14 @@ async def test_streaming_response_artifacts(self, async_client: AsyncLlamaStackC
 
     @parametrize
     async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.cancel(
+        job = await async_client.alpha.post_training.job.cancel(
             job_uuid="job_uuid",
         )
         assert job is None
 
     @parametrize
     async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.job.with_raw_response.cancel(
+        response = await async_client.alpha.post_training.job.with_raw_response.cancel(
             job_uuid="job_uuid",
         )
 
@@ -221,7 +221,7 @@ async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.job.with_streaming_response.cancel(
+        async with async_client.alpha.post_training.job.with_streaming_response.cancel(
             job_uuid="job_uuid",
         ) as response:
             assert not response.is_closed
@@ -234,14 +234,14 @@ async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClie
 
     @parametrize
     async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.status(
+        job = await async_client.alpha.post_training.job.status(
             job_uuid="job_uuid",
         )
         assert_matches_type(JobStatusResponse, job, path=["response"])
 
     @parametrize
     async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.job.with_raw_response.status(
+        response = await async_client.alpha.post_training.job.with_raw_response.status(
             job_uuid="job_uuid",
         )
 
@@ -252,7 +252,7 @@ async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.job.with_streaming_response.status(
+        async with async_client.alpha.post_training.job.with_streaming_response.status(
             job_uuid="job_uuid",
         ) as response:
             assert not response.is_closed
diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/alpha/test_agents.py
similarity index 87%
rename from tests/api_resources/test_agents.py
rename to tests/api_resources/alpha/test_agents.py
index c19bc9bf..d67e8457 100644
--- a/tests/api_resources/test_agents.py
+++ b/tests/api_resources/alpha/test_agents.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
+from llama_stack_client.types.alpha import (
     AgentListResponse,
     AgentCreateResponse,
     AgentRetrieveResponse,
@@ -23,7 +23,7 @@ class TestAgents:
 
     @parametrize
     def test_method_create(self, client: LlamaStackClient) -> None:
-        agent = client.agents.create(
+        agent = client.alpha.agents.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -33,7 +33,7 @@ def test_method_create(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
-        agent = client.agents.create(
+        agent = client.alpha.agents.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -84,7 +84,7 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_create(self, client: LlamaStackClient) -> None:
-        response = client.agents.with_raw_response.create(
+        response = client.alpha.agents.with_raw_response.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -98,7 +98,7 @@ def test_raw_response_create(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: LlamaStackClient) -> None:
-        with client.agents.with_streaming_response.create(
+        with client.alpha.agents.with_streaming_response.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -114,14 +114,14 @@ def test_streaming_response_create(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        agent = client.agents.retrieve(
+        agent = client.alpha.agents.retrieve(
             "agent_id",
         )
         assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.agents.with_raw_response.retrieve(
+        response = client.alpha.agents.with_raw_response.retrieve(
             "agent_id",
         )
 
@@ -132,7 +132,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.agents.with_streaming_response.retrieve(
+        with client.alpha.agents.with_streaming_response.retrieve(
             "agent_id",
         ) as response:
             assert not response.is_closed
@@ -146,18 +146,18 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.with_raw_response.retrieve(
+            client.alpha.agents.with_raw_response.retrieve(
                 "",
             )
 
     @parametrize
     def test_method_list(self, client: LlamaStackClient) -> None:
-        agent = client.agents.list()
+        agent = client.alpha.agents.list()
         assert_matches_type(AgentListResponse, agent, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        agent = client.agents.list(
+        agent = client.alpha.agents.list(
             limit=0,
             start_index=0,
         )
@@ -165,7 +165,7 @@ def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.agents.with_raw_response.list()
+        response = client.alpha.agents.with_raw_response.list()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -174,7 +174,7 @@ def test_raw_response_list(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.agents.with_streaming_response.list() as response:
+        with client.alpha.agents.with_streaming_response.list() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -185,14 +185,14 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_delete(self, client: LlamaStackClient) -> None:
-        agent = client.agents.delete(
+        agent = client.alpha.agents.delete(
             "agent_id",
         )
         assert agent is None
 
     @parametrize
     def test_raw_response_delete(self, client: LlamaStackClient) -> None:
-        response = client.agents.with_raw_response.delete(
+        response = client.alpha.agents.with_raw_response.delete(
             "agent_id",
         )
 
@@ -203,7 +203,7 @@ def test_raw_response_delete(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
-        with client.agents.with_streaming_response.delete(
+        with client.alpha.agents.with_streaming_response.delete(
             "agent_id",
         ) as response:
             assert not response.is_closed
@@ -217,7 +217,7 @@ def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_delete(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.with_raw_response.delete(
+            client.alpha.agents.with_raw_response.delete(
                 "",
             )
 
@@ -229,7 +229,7 @@ class TestAsyncAgents:
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.create(
+        agent = await async_client.alpha.agents.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -239,7 +239,7 @@ async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.create(
+        agent = await async_client.alpha.agents.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -290,7 +290,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.with_raw_response.create(
+        response = await async_client.alpha.agents.with_raw_response.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -304,7 +304,7 @@ async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.with_streaming_response.create(
+        async with async_client.alpha.agents.with_streaming_response.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -320,14 +320,14 @@ async def test_streaming_response_create(self, async_client: AsyncLlamaStackClie
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.retrieve(
+        agent = await async_client.alpha.agents.retrieve(
             "agent_id",
         )
         assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.with_raw_response.retrieve(
+        response = await async_client.alpha.agents.with_raw_response.retrieve(
             "agent_id",
         )
 
@@ -338,7 +338,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.with_streaming_response.retrieve(
+        async with async_client.alpha.agents.with_streaming_response.retrieve(
             "agent_id",
         ) as response:
             assert not response.is_closed
@@ -352,18 +352,18 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.with_raw_response.retrieve(
+            await async_client.alpha.agents.with_raw_response.retrieve(
                 "",
             )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.list()
+        agent = await async_client.alpha.agents.list()
         assert_matches_type(AgentListResponse, agent, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.list(
+        agent = await async_client.alpha.agents.list(
             limit=0,
             start_index=0,
         )
@@ -371,7 +371,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackCl
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.with_raw_response.list()
+        response = await async_client.alpha.agents.with_raw_response.list()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -380,7 +380,7 @@ async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> N
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.with_streaming_response.list() as response:
+        async with async_client.alpha.agents.with_streaming_response.list() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -391,14 +391,14 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.delete(
+        agent = await async_client.alpha.agents.delete(
             "agent_id",
         )
         assert agent is None
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.with_raw_response.delete(
+        response = await async_client.alpha.agents.with_raw_response.delete(
             "agent_id",
         )
 
@@ -409,7 +409,7 @@ async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.with_streaming_response.delete(
+        async with async_client.alpha.agents.with_streaming_response.delete(
             "agent_id",
         ) as response:
             assert not response.is_closed
@@ -423,6 +423,6 @@ async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClie
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.with_raw_response.delete(
+            await async_client.alpha.agents.with_raw_response.delete(
                 "",
             )
diff --git a/tests/api_resources/test_eval.py b/tests/api_resources/alpha/test_eval.py
similarity index 93%
rename from tests/api_resources/test_eval.py
rename to tests/api_resources/alpha/test_eval.py
index 878b3d28..88bd0c0c 100644
--- a/tests/api_resources/test_eval.py
+++ b/tests/api_resources/alpha/test_eval.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
+from llama_stack_client.types.alpha import (
     Job,
     EvaluateResponse,
 )
@@ -22,7 +22,7 @@ class TestEval:
 
     @parametrize
     def test_method_evaluate_rows(self, client: LlamaStackClient) -> None:
-        eval = client.eval.evaluate_rows(
+        eval = client.alpha.eval.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -46,7 +46,7 @@ def test_method_evaluate_rows(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.eval.evaluate_rows(
+        eval = client.alpha.eval.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -81,7 +81,7 @@ def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) ->
 
     @parametrize
     def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None:
-        response = client.eval.with_raw_response.evaluate_rows(
+        response = client.alpha.eval.with_raw_response.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -109,7 +109,7 @@ def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> None:
-        with client.eval.with_streaming_response.evaluate_rows(
+        with client.alpha.eval.with_streaming_response.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -140,7 +140,7 @@ def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> Non
     @parametrize
     def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.with_raw_response.evaluate_rows(
+            client.alpha.eval.with_raw_response.evaluate_rows(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -163,7 +163,7 @@ def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        eval = client.eval.evaluate_rows_alpha(
+        eval = client.alpha.eval.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -187,7 +187,7 @@ def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.eval.evaluate_rows_alpha(
+        eval = client.alpha.eval.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -222,7 +222,7 @@ def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClie
 
     @parametrize
     def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        response = client.eval.with_raw_response.evaluate_rows_alpha(
+        response = client.alpha.eval.with_raw_response.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -250,7 +250,7 @@ def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> Non
 
     @parametrize
     def test_streaming_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        with client.eval.with_streaming_response.evaluate_rows_alpha(
+        with client.alpha.eval.with_streaming_response.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -281,7 +281,7 @@ def test_streaming_response_evaluate_rows_alpha(self, client: LlamaStackClient)
     @parametrize
     def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.with_raw_response.evaluate_rows_alpha(
+            client.alpha.eval.with_raw_response.evaluate_rows_alpha(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -304,7 +304,7 @@ def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None
 
     @parametrize
     def test_method_run_eval(self, client: LlamaStackClient) -> None:
-        eval = client.eval.run_eval(
+        eval = client.alpha.eval.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -326,7 +326,7 @@ def test_method_run_eval(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.eval.run_eval(
+        eval = client.alpha.eval.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -359,7 +359,7 @@ def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None
 
     @parametrize
     def test_raw_response_run_eval(self, client: LlamaStackClient) -> None:
-        response = client.eval.with_raw_response.run_eval(
+        response = client.alpha.eval.with_raw_response.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -385,7 +385,7 @@ def test_raw_response_run_eval(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None:
-        with client.eval.with_streaming_response.run_eval(
+        with client.alpha.eval.with_streaming_response.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -414,7 +414,7 @@ def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_run_eval(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.with_raw_response.run_eval(
+            client.alpha.eval.with_raw_response.run_eval(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -435,7 +435,7 @@ def test_path_params_run_eval(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        eval = client.eval.run_eval_alpha(
+        eval = client.alpha.eval.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -457,7 +457,7 @@ def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.eval.run_eval_alpha(
+        eval = client.alpha.eval.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -490,7 +490,7 @@ def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) -
 
     @parametrize
     def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        response = client.eval.with_raw_response.run_eval_alpha(
+        response = client.alpha.eval.with_raw_response.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -516,7 +516,7 @@ def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        with client.eval.with_streaming_response.run_eval_alpha(
+        with client.alpha.eval.with_streaming_response.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -545,7 +545,7 @@ def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> No
     @parametrize
     def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.with_raw_response.run_eval_alpha(
+            client.alpha.eval.with_raw_response.run_eval_alpha(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -572,7 +572,7 @@ class TestAsyncEval:
 
     @parametrize
     async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.evaluate_rows(
+        eval = await async_client.alpha.eval.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -596,7 +596,7 @@ async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -
 
     @parametrize
     async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.evaluate_rows(
+        eval = await async_client.alpha.eval.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -631,7 +631,7 @@ async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLla
 
     @parametrize
     async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.with_raw_response.evaluate_rows(
+        response = await async_client.alpha.eval.with_raw_response.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -659,7 +659,7 @@ async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackCli
 
     @parametrize
     async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.with_streaming_response.evaluate_rows(
+        async with async_client.alpha.eval.with_streaming_response.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -690,7 +690,7 @@ async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaSt
     @parametrize
     async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.with_raw_response.evaluate_rows(
+            await async_client.alpha.eval.with_raw_response.evaluate_rows(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -713,7 +713,7 @@ async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClie
 
     @parametrize
     async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.evaluate_rows_alpha(
+        eval = await async_client.alpha.eval.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -737,7 +737,7 @@ async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackCli
 
     @parametrize
     async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.evaluate_rows_alpha(
+        eval = await async_client.alpha.eval.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -772,7 +772,7 @@ async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: As
 
     @parametrize
     async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.with_raw_response.evaluate_rows_alpha(
+        response = await async_client.alpha.eval.with_raw_response.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -800,7 +800,7 @@ async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaSt
 
     @parametrize
     async def test_streaming_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.with_streaming_response.evaluate_rows_alpha(
+        async with async_client.alpha.eval.with_streaming_response.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -831,7 +831,7 @@ async def test_streaming_response_evaluate_rows_alpha(self, async_client: AsyncL
     @parametrize
     async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.with_raw_response.evaluate_rows_alpha(
+            await async_client.alpha.eval.with_raw_response.evaluate_rows_alpha(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -854,7 +854,7 @@ async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaSta
 
     @parametrize
     async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.run_eval(
+        eval = await async_client.alpha.eval.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -876,7 +876,7 @@ async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> Non
 
     @parametrize
     async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.run_eval(
+        eval = await async_client.alpha.eval.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -909,7 +909,7 @@ async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaSta
 
     @parametrize
     async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.with_raw_response.run_eval(
+        response = await async_client.alpha.eval.with_raw_response.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -935,7 +935,7 @@ async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.with_streaming_response.run_eval(
+        async with async_client.alpha.eval.with_streaming_response.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -964,7 +964,7 @@ async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.with_raw_response.run_eval(
+            await async_client.alpha.eval.with_raw_response.run_eval(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -985,7 +985,7 @@ async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) -
 
     @parametrize
     async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.run_eval_alpha(
+        eval = await async_client.alpha.eval.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -1007,7 +1007,7 @@ async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.run_eval_alpha(
+        eval = await async_client.alpha.eval.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -1040,7 +1040,7 @@ async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLl
 
     @parametrize
     async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.with_raw_response.run_eval_alpha(
+        response = await async_client.alpha.eval.with_raw_response.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -1066,7 +1066,7 @@ async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackCl
 
     @parametrize
     async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.with_streaming_response.run_eval_alpha(
+        async with async_client.alpha.eval.with_streaming_response.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -1095,7 +1095,7 @@ async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaS
     @parametrize
     async def test_path_params_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.with_raw_response.run_eval_alpha(
+            await async_client.alpha.eval.with_raw_response.run_eval_alpha(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
diff --git a/tests/api_resources/test_inference.py b/tests/api_resources/alpha/test_inference.py
similarity index 86%
rename from tests/api_resources/test_inference.py
rename to tests/api_resources/alpha/test_inference.py
index f26802c2..551e2213 100644
--- a/tests/api_resources/test_inference.py
+++ b/tests/api_resources/alpha/test_inference.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import InferenceRerankResponse
+from llama_stack_client.types.alpha import InferenceRerankResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -19,7 +19,7 @@ class TestInference:
 
     @parametrize
     def test_method_rerank(self, client: LlamaStackClient) -> None:
-        inference = client.inference.rerank(
+        inference = client.alpha.inference.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -28,7 +28,7 @@ def test_method_rerank(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_rerank_with_all_params(self, client: LlamaStackClient) -> None:
-        inference = client.inference.rerank(
+        inference = client.alpha.inference.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -38,7 +38,7 @@ def test_method_rerank_with_all_params(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_rerank(self, client: LlamaStackClient) -> None:
-        response = client.inference.with_raw_response.rerank(
+        response = client.alpha.inference.with_raw_response.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -51,7 +51,7 @@ def test_raw_response_rerank(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_rerank(self, client: LlamaStackClient) -> None:
-        with client.inference.with_streaming_response.rerank(
+        with client.alpha.inference.with_streaming_response.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -72,7 +72,7 @@ class TestAsyncInference:
 
     @parametrize
     async def test_method_rerank(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.rerank(
+        inference = await async_client.alpha.inference.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -81,7 +81,7 @@ async def test_method_rerank(self, async_client: AsyncLlamaStackClient) -> None:
 
     @parametrize
     async def test_method_rerank_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.rerank(
+        inference = await async_client.alpha.inference.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -91,7 +91,7 @@ async def test_method_rerank_with_all_params(self, async_client: AsyncLlamaStack
 
     @parametrize
     async def test_raw_response_rerank(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.inference.with_raw_response.rerank(
+        response = await async_client.alpha.inference.with_raw_response.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -104,7 +104,7 @@ async def test_raw_response_rerank(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_rerank(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.inference.with_streaming_response.rerank(
+        async with async_client.alpha.inference.with_streaming_response.rerank(
             items=["string"],
             model="model",
             query="string",
diff --git a/tests/api_resources/test_post_training.py b/tests/api_resources/alpha/test_post_training.py
similarity index 92%
rename from tests/api_resources/test_post_training.py
rename to tests/api_resources/alpha/test_post_training.py
index 899a53ca..14229811 100644
--- a/tests/api_resources/test_post_training.py
+++ b/tests/api_resources/alpha/test_post_training.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
+from llama_stack_client.types.alpha import (
     PostTrainingJob,
 )
 
@@ -21,7 +21,7 @@ class TestPostTraining:
 
     @parametrize
     def test_method_preference_optimize(self, client: LlamaStackClient) -> None:
-        post_training = client.post_training.preference_optimize(
+        post_training = client.alpha.post_training.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -40,7 +40,7 @@ def test_method_preference_optimize(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_preference_optimize_with_all_params(self, client: LlamaStackClient) -> None:
-        post_training = client.post_training.preference_optimize(
+        post_training = client.alpha.post_training.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -82,7 +82,7 @@ def test_method_preference_optimize_with_all_params(self, client: LlamaStackClie
 
     @parametrize
     def test_raw_response_preference_optimize(self, client: LlamaStackClient) -> None:
-        response = client.post_training.with_raw_response.preference_optimize(
+        response = client.alpha.post_training.with_raw_response.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -105,7 +105,7 @@ def test_raw_response_preference_optimize(self, client: LlamaStackClient) -> Non
 
     @parametrize
     def test_streaming_response_preference_optimize(self, client: LlamaStackClient) -> None:
-        with client.post_training.with_streaming_response.preference_optimize(
+        with client.alpha.post_training.with_streaming_response.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -130,7 +130,7 @@ def test_streaming_response_preference_optimize(self, client: LlamaStackClient)
 
     @parametrize
     def test_method_supervised_fine_tune(self, client: LlamaStackClient) -> None:
-        post_training = client.post_training.supervised_fine_tune(
+        post_training = client.alpha.post_training.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -144,7 +144,7 @@ def test_method_supervised_fine_tune(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_supervised_fine_tune_with_all_params(self, client: LlamaStackClient) -> None:
-        post_training = client.post_training.supervised_fine_tune(
+        post_training = client.alpha.post_training.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -193,7 +193,7 @@ def test_method_supervised_fine_tune_with_all_params(self, client: LlamaStackCli
 
     @parametrize
     def test_raw_response_supervised_fine_tune(self, client: LlamaStackClient) -> None:
-        response = client.post_training.with_raw_response.supervised_fine_tune(
+        response = client.alpha.post_training.with_raw_response.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -211,7 +211,7 @@ def test_raw_response_supervised_fine_tune(self, client: LlamaStackClient) -> No
 
     @parametrize
     def test_streaming_response_supervised_fine_tune(self, client: LlamaStackClient) -> None:
-        with client.post_training.with_streaming_response.supervised_fine_tune(
+        with client.alpha.post_training.with_streaming_response.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -237,7 +237,7 @@ class TestAsyncPostTraining:
 
     @parametrize
     async def test_method_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.post_training.preference_optimize(
+        post_training = await async_client.alpha.post_training.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -256,7 +256,7 @@ async def test_method_preference_optimize(self, async_client: AsyncLlamaStackCli
 
     @parametrize
     async def test_method_preference_optimize_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.post_training.preference_optimize(
+        post_training = await async_client.alpha.post_training.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -298,7 +298,7 @@ async def test_method_preference_optimize_with_all_params(self, async_client: As
 
     @parametrize
     async def test_raw_response_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.with_raw_response.preference_optimize(
+        response = await async_client.alpha.post_training.with_raw_response.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -321,7 +321,7 @@ async def test_raw_response_preference_optimize(self, async_client: AsyncLlamaSt
 
     @parametrize
     async def test_streaming_response_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.with_streaming_response.preference_optimize(
+        async with async_client.alpha.post_training.with_streaming_response.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -346,7 +346,7 @@ async def test_streaming_response_preference_optimize(self, async_client: AsyncL
 
     @parametrize
     async def test_method_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.post_training.supervised_fine_tune(
+        post_training = await async_client.alpha.post_training.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -360,7 +360,7 @@ async def test_method_supervised_fine_tune(self, async_client: AsyncLlamaStackCl
 
     @parametrize
     async def test_method_supervised_fine_tune_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.post_training.supervised_fine_tune(
+        post_training = await async_client.alpha.post_training.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -409,7 +409,7 @@ async def test_method_supervised_fine_tune_with_all_params(self, async_client: A
 
     @parametrize
     async def test_raw_response_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.with_raw_response.supervised_fine_tune(
+        response = await async_client.alpha.post_training.with_raw_response.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -427,7 +427,7 @@ async def test_raw_response_supervised_fine_tune(self, async_client: AsyncLlamaS
 
     @parametrize
     async def test_streaming_response_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.with_streaming_response.supervised_fine_tune(
+        async with async_client.alpha.post_training.with_streaming_response.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
diff --git a/tests/api_resources/vector_stores/test_file_batches.py b/tests/api_resources/vector_stores/test_file_batches.py
new file mode 100644
index 00000000..b92f31b2
--- /dev/null
+++ b/tests/api_resources/vector_stores/test_file_batches.py
@@ -0,0 +1,446 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.pagination import SyncOpenAICursorPage, AsyncOpenAICursorPage
+from llama_stack_client.types.vector_stores import (
+    VectorStoreFile,
+    VectorStoreFileBatches,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestFileBatches:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: LlamaStackClient) -> None:
+        file_batch = client.vector_stores.file_batches.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
+        file_batch = client.vector_stores.file_batches.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+            attributes={"foo": True},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: LlamaStackClient) -> None:
+        response = client.vector_stores.file_batches.with_raw_response.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
+        with client.vector_stores.file_batches.with_streaming_response.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.create(
+                vector_store_id="",
+                file_ids=["string"],
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackClient) -> None:
+        file_batch = client.vector_stores.file_batches.retrieve(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
+        response = client.vector_stores.file_batches.with_raw_response.retrieve(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
+        with client.vector_stores.file_batches.with_streaming_response.retrieve(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    def test_method_list(self, client: LlamaStackClient) -> None:
+        file_batch = client.vector_stores.file_batches.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(SyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
+        file_batch = client.vector_stores.file_batches.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
+            filter="filter",
+            limit=0,
+            order="order",
+        )
+        assert_matches_type(SyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: LlamaStackClient) -> None:
+        response = client.vector_stores.file_batches.with_raw_response.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(SyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
+        with client.vector_stores.file_batches.with_streaming_response.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(SyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.list(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.list(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    def test_method_cancel(self, client: LlamaStackClient) -> None:
+        file_batch = client.vector_stores.file_batches.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
+        response = client.vector_stores.file_batches.with_raw_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
+        with client.vector_stores.file_batches.with_streaming_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
+
+
+class TestAsyncFileBatches:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
+        file_batch = await async_client.vector_stores.file_batches.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        file_batch = await async_client.vector_stores.file_batches.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+            attributes={"foo": True},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.vector_stores.file_batches.with_raw_response.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = await response.parse()
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.vector_stores.file_batches.with_streaming_response.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.create(
+                vector_store_id="",
+                file_ids=["string"],
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        file_batch = await async_client.vector_stores.file_batches.retrieve(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = await response.parse()
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.vector_stores.file_batches.with_streaming_response.retrieve(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
+        file_batch = await async_client.vector_stores.file_batches.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(AsyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        file_batch = await async_client.vector_stores.file_batches.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
+            filter="filter",
+            limit=0,
+            order="order",
+        )
+        assert_matches_type(AsyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.vector_stores.file_batches.with_raw_response.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = await response.parse()
+        assert_matches_type(AsyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.vector_stores.file_batches.with_streaming_response.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(AsyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.list(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.list(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        file_batch = await async_client.vector_stores.file_batches.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.vector_stores.file_batches.with_raw_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = await response.parse()
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.vector_stores.file_batches.with_streaming_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )