From ee323a0e29892ef53b7d98bf8ad943cd865fb785 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 30 Sep 2025 17:55:56 +0000
Subject: [PATCH 1/7] feat(api): SDKs for vector store file batches

---
 .stats.yml                                    |   8 +-
 api.md                                        |   9 +-
 src/llama_stack_client/resources/files.py     | 157 ------------------
 .../resources/vector_stores/files.py          |  85 ----------
 src/llama_stack_client/types/__init__.py      |   1 -
 .../types/delete_file_response.py             |  18 --
 .../types/vector_stores/__init__.py           |   1 -
 .../vector_stores/file_content_response.py    |  30 ----
 tests/api_resources/test_files.py             | 154 +----------------
 .../api_resources/vector_stores/test_files.py |  97 -----------
 10 files changed, 6 insertions(+), 554 deletions(-)
 delete mode 100644 src/llama_stack_client/types/delete_file_response.py
 delete mode 100644 src/llama_stack_client/types/vector_stores/file_content_response.py
diff --git a/.stats.yml b/.stats.yml
index 755df453..cbb0181e 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 105
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-d7bea816190382a93511491e33d1f37f707620926ab133ae8ce0883d763df741.yml
-openapi_spec_hash: f73b3af77108625edae3f25972b9e665
-config_hash: 548f336ac1b68ab1dfe385b79df764dd
+configured_endpoints: 102
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-3c569913f686c852ab392d470b9d905cc944d5a46a2324a21aa1376fb24e6714.yml
+openapi_spec_hash: 455f397c5f401ea425a4064bb39c6801
+config_hash: 53c09ba1fdae5045de1860c479a51dc7
diff --git a/api.md b/api.md
index c246f4c1..7d4b431a 100644
--- a/api.md
+++ b/api.md
@@ -351,11 +351,7 @@ Methods:
 Types:
 
 ```python
-from llama_stack_client.types.vector_stores import (
-    VectorStoreFile,
-    FileDeleteResponse,
-    FileContentResponse,
-)
+from llama_stack_client.types.vector_stores import VectorStoreFile, FileDeleteResponse
 ```
 
 Methods:
@@ -365,7 +361,6 @@ Methods:
 - <code title="post /v1/vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">update</a>(file_id, \*, vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_update_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
 - <code title="get /v1/vector_stores/{vector_store_id}/files">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">list</a>(vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file.py">SyncOpenAICursorPage[VectorStoreFile]</a></code>
 - <code title="delete /v1/vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">delete</a>(file_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/file_delete_response.py">FileDeleteResponse</a></code>
-- <code title="get /v1/vector_stores/{vector_store_id}/files/{file_id}/content">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">content</a>(file_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/file_content_response.py">FileContentResponse</a></code>
 
 # Models
 
@@ -589,7 +584,5 @@ from llama_stack_client.types import DeleteFileResponse, File, ListFilesResponse
 Methods:
 
 - <code title="post /v1/files">client.files.<a href="./src/llama_stack_client/resources/files.py">create</a>(\*\*<a href="src/llama_stack_client/types/file_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/file.py">File</a></code>
-- <code title="get /v1/files/{file_id}">client.files.<a href="./src/llama_stack_client/resources/files.py">retrieve</a>(file_id) -> <a href="./src/llama_stack_client/types/file.py">File</a></code>
 - <code title="get /v1/files">client.files.<a href="./src/llama_stack_client/resources/files.py">list</a>(\*\*<a href="src/llama_stack_client/types/file_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/file.py">SyncOpenAICursorPage[File]</a></code>
-- <code title="delete /v1/files/{file_id}">client.files.<a href="./src/llama_stack_client/resources/files.py">delete</a>(file_id) -> <a href="./src/llama_stack_client/types/delete_file_response.py">DeleteFileResponse</a></code>
 - <code title="get /v1/files/{file_id}/content">client.files.<a href="./src/llama_stack_client/resources/files.py">content</a>(file_id) -> object</code>
diff --git a/src/llama_stack_client/resources/files.py b/src/llama_stack_client/resources/files.py
index 39add811..dfe1edea 100644
--- a/src/llama_stack_client/resources/files.py
+++ b/src/llama_stack_client/resources/files.py
@@ -21,7 +21,6 @@
 from ..pagination import SyncOpenAICursorPage, AsyncOpenAICursorPage
 from ..types.file import File
 from .._base_client import AsyncPaginator, make_request_options
-from ..types.delete_file_response import DeleteFileResponse
 
 __all__ = ["FilesResource", "AsyncFilesResource"]
 
@@ -107,39 +106,6 @@ def create(
             cast_to=File,
         )
 
-    def retrieve(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> File:
-        """
-        Returns information about a specific file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return self._get(
-            f"/v1/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=File,
-        )
-
     def list(
         self,
         *,
@@ -200,39 +166,6 @@ def list(
             model=File,
         )
 
-    def delete(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> DeleteFileResponse:
-        """
-        Delete a file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return self._delete(
-            f"/v1/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=DeleteFileResponse,
-        )
-
     def content(
         self,
         file_id: str,
@@ -348,39 +281,6 @@ async def create(
             cast_to=File,
         )
 
-    async def retrieve(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> File:
-        """
-        Returns information about a specific file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return await self._get(
-            f"/v1/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=File,
-        )
-
     def list(
         self,
         *,
@@ -441,39 +341,6 @@ def list(
             model=File,
         )
 
-    async def delete(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> DeleteFileResponse:
-        """
-        Delete a file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return await self._delete(
-            f"/v1/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=DeleteFileResponse,
-        )
-
     async def content(
         self,
         file_id: str,
@@ -515,15 +382,9 @@ def __init__(self, files: FilesResource) -> None:
         self.create = to_raw_response_wrapper(
             files.create,
         )
-        self.retrieve = to_raw_response_wrapper(
-            files.retrieve,
-        )
         self.list = to_raw_response_wrapper(
             files.list,
         )
-        self.delete = to_raw_response_wrapper(
-            files.delete,
-        )
         self.content = to_raw_response_wrapper(
             files.content,
         )
@@ -536,15 +397,9 @@ def __init__(self, files: AsyncFilesResource) -> None:
         self.create = async_to_raw_response_wrapper(
             files.create,
         )
-        self.retrieve = async_to_raw_response_wrapper(
-            files.retrieve,
-        )
         self.list = async_to_raw_response_wrapper(
             files.list,
         )
-        self.delete = async_to_raw_response_wrapper(
-            files.delete,
-        )
         self.content = async_to_raw_response_wrapper(
             files.content,
         )
@@ -557,15 +412,9 @@ def __init__(self, files: FilesResource) -> None:
         self.create = to_streamed_response_wrapper(
             files.create,
         )
-        self.retrieve = to_streamed_response_wrapper(
-            files.retrieve,
-        )
         self.list = to_streamed_response_wrapper(
             files.list,
         )
-        self.delete = to_streamed_response_wrapper(
-            files.delete,
-        )
         self.content = to_streamed_response_wrapper(
             files.content,
         )
@@ -578,15 +427,9 @@ def __init__(self, files: AsyncFilesResource) -> None:
         self.create = async_to_streamed_response_wrapper(
             files.create,
         )
-        self.retrieve = async_to_streamed_response_wrapper(
-            files.retrieve,
-        )
         self.list = async_to_streamed_response_wrapper(
             files.list,
         )
-        self.delete = async_to_streamed_response_wrapper(
-            files.delete,
-        )
         self.content = async_to_streamed_response_wrapper(
             files.content,
         )
diff --git a/src/llama_stack_client/resources/vector_stores/files.py b/src/llama_stack_client/resources/vector_stores/files.py
index f9a1ef31..6d532dfb 100644
--- a/src/llama_stack_client/resources/vector_stores/files.py
+++ b/src/llama_stack_client/resources/vector_stores/files.py
@@ -22,7 +22,6 @@
 from ...types.vector_stores import file_list_params, file_create_params, file_update_params
 from ...types.vector_stores.vector_store_file import VectorStoreFile
 from ...types.vector_stores.file_delete_response import FileDeleteResponse
-from ...types.vector_stores.file_content_response import FileContentResponse
 
 __all__ = ["FilesResource", "AsyncFilesResource"]
 
@@ -275,42 +274,6 @@ def delete(
             cast_to=FileDeleteResponse,
         )
 
-    def content(
-        self,
-        file_id: str,
-        *,
-        vector_store_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> FileContentResponse:
-        """
-        Retrieves the contents of a vector store file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return self._get(
-            f"/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FileContentResponse,
-        )
-
 
 class AsyncFilesResource(AsyncAPIResource):
     @cached_property
@@ -560,42 +523,6 @@ async def delete(
             cast_to=FileDeleteResponse,
         )
 
-    async def content(
-        self,
-        file_id: str,
-        *,
-        vector_store_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> FileContentResponse:
-        """
-        Retrieves the contents of a vector store file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return await self._get(
-            f"/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FileContentResponse,
-        )
-
 
 class FilesResourceWithRawResponse:
     def __init__(self, files: FilesResource) -> None:
@@ -616,9 +543,6 @@ def __init__(self, files: FilesResource) -> None:
         self.delete = to_raw_response_wrapper(
             files.delete,
         )
-        self.content = to_raw_response_wrapper(
-            files.content,
-        )
 
 
 class AsyncFilesResourceWithRawResponse:
@@ -640,9 +564,6 @@ def __init__(self, files: AsyncFilesResource) -> None:
         self.delete = async_to_raw_response_wrapper(
             files.delete,
         )
-        self.content = async_to_raw_response_wrapper(
-            files.content,
-        )
 
 
 class FilesResourceWithStreamingResponse:
@@ -664,9 +585,6 @@ def __init__(self, files: FilesResource) -> None:
         self.delete = to_streamed_response_wrapper(
             files.delete,
         )
-        self.content = to_streamed_response_wrapper(
-            files.content,
-        )
 
 
 class AsyncFilesResourceWithStreamingResponse:
@@ -688,6 +606,3 @@ def __init__(self, files: AsyncFilesResource) -> None:
         self.delete = async_to_streamed_response_wrapper(
             files.delete,
         )
-        self.content = async_to_streamed_response_wrapper(
-            files.content,
-        )
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index f81ada61..7bc1b518 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -64,7 +64,6 @@
 from .run_shield_response import RunShieldResponse as RunShieldResponse
 from .tool_execution_step import ToolExecutionStep as ToolExecutionStep
 from .tool_response_param import ToolResponseParam as ToolResponseParam
-from .delete_file_response import DeleteFileResponse as DeleteFileResponse
 from .eval_run_eval_params import EvalRunEvalParams as EvalRunEvalParams
 from .list_models_response import ListModelsResponse as ListModelsResponse
 from .list_routes_response import ListRoutesResponse as ListRoutesResponse
diff --git a/src/llama_stack_client/types/delete_file_response.py b/src/llama_stack_client/types/delete_file_response.py
deleted file mode 100644
index 2188556f..00000000
--- a/src/llama_stack_client/types/delete_file_response.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["DeleteFileResponse"]
-
-
-class DeleteFileResponse(BaseModel):
-    id: str
-    """The file identifier that was deleted"""
-
-    deleted: bool
-    """Whether the file was successfully deleted"""
-
-    object: Literal["file"]
-    """The object type, which is always "file" """
diff --git a/src/llama_stack_client/types/vector_stores/__init__.py b/src/llama_stack_client/types/vector_stores/__init__.py
index 68bcf684..2f5fd33b 100644
--- a/src/llama_stack_client/types/vector_stores/__init__.py
+++ b/src/llama_stack_client/types/vector_stores/__init__.py
@@ -7,4 +7,3 @@
 from .file_create_params import FileCreateParams as FileCreateParams
 from .file_update_params import FileUpdateParams as FileUpdateParams
 from .file_delete_response import FileDeleteResponse as FileDeleteResponse
-from .file_content_response import FileContentResponse as FileContentResponse
diff --git a/src/llama_stack_client/types/vector_stores/file_content_response.py b/src/llama_stack_client/types/vector_stores/file_content_response.py
deleted file mode 100644
index 035a34a8..00000000
--- a/src/llama_stack_client/types/vector_stores/file_content_response.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["FileContentResponse", "Content"]
-
-
-class Content(BaseModel):
-    text: str
-    """The actual text content"""
-
-    type: Literal["text"]
-    """Content type, currently only "text" is supported"""
-
-
-class FileContentResponse(BaseModel):
-    attributes: Dict[str, Union[bool, float, str, List[object], object, None]]
-    """Key-value attributes associated with the file"""
-
-    content: List[Content]
-    """List of content items from the file"""
-
-    file_id: str
-    """Unique identifier for the file"""
-
-    filename: str
-    """Name of the file"""
diff --git a/tests/api_resources/test_files.py b/tests/api_resources/test_files.py
index 83b763ab..926347a5 100644
--- a/tests/api_resources/test_files.py
+++ b/tests/api_resources/test_files.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import File, DeleteFileResponse
+from llama_stack_client.types import File
 from llama_stack_client.pagination import SyncOpenAICursorPage, AsyncOpenAICursorPage
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -64,44 +64,6 @@ def test_streaming_response_create(self, client: LlamaStackClient) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        file = client.files.retrieve(
-            "file_id",
-        )
-        assert_matches_type(File, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.files.with_raw_response.retrieve(
-            "file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(File, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.files.with_streaming_response.retrieve(
-            "file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(File, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            client.files.with_raw_response.retrieve(
-                "",
-            )
-
     @parametrize
     def test_method_list(self, client: LlamaStackClient) -> None:
         file = client.files.list()
@@ -137,44 +99,6 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
         assert cast(Any, response.is_closed) is True
 
-    @parametrize
-    def test_method_delete(self, client: LlamaStackClient) -> None:
-        file = client.files.delete(
-            "file_id",
-        )
-        assert_matches_type(DeleteFileResponse, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_delete(self, client: LlamaStackClient) -> None:
-        response = client.files.with_raw_response.delete(
-            "file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(DeleteFileResponse, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
-        with client.files.with_streaming_response.delete(
-            "file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(DeleteFileResponse, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_delete(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            client.files.with_raw_response.delete(
-                "",
-            )
-
     @parametrize
     def test_method_content(self, client: LlamaStackClient) -> None:
         file = client.files.content(
@@ -265,44 +189,6 @@ async def test_streaming_response_create(self, async_client: AsyncLlamaStackClie
 
         assert cast(Any, response.is_closed) is True
 
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        file = await async_client.files.retrieve(
-            "file_id",
-        )
-        assert_matches_type(File, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.files.with_raw_response.retrieve(
-            "file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = await response.parse()
-        assert_matches_type(File, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.files.with_streaming_response.retrieve(
-            "file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(File, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            await async_client.files.with_raw_response.retrieve(
-                "",
-            )
-
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         file = await async_client.files.list()
@@ -338,44 +224,6 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
 
         assert cast(Any, response.is_closed) is True
 
-    @parametrize
-    async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        file = await async_client.files.delete(
-            "file_id",
-        )
-        assert_matches_type(DeleteFileResponse, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.files.with_raw_response.delete(
-            "file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = await response.parse()
-        assert_matches_type(DeleteFileResponse, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.files.with_streaming_response.delete(
-            "file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(DeleteFileResponse, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            await async_client.files.with_raw_response.delete(
-                "",
-            )
-
     @parametrize
     async def test_method_content(self, async_client: AsyncLlamaStackClient) -> None:
         file = await async_client.files.content(
diff --git a/tests/api_resources/vector_stores/test_files.py b/tests/api_resources/vector_stores/test_files.py
index cf38bc2b..5cc9f9c1 100644
--- a/tests/api_resources/vector_stores/test_files.py
+++ b/tests/api_resources/vector_stores/test_files.py
@@ -13,7 +13,6 @@
 from llama_stack_client.types.vector_stores import (
     VectorStoreFile,
     FileDeleteResponse,
-    FileContentResponse,
 )
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -273,54 +272,6 @@ def test_path_params_delete(self, client: LlamaStackClient) -> None:
                 vector_store_id="vector_store_id",
             )
 
-    @parametrize
-    def test_method_content(self, client: LlamaStackClient) -> None:
-        file = client.vector_stores.files.content(
-            file_id="file_id",
-            vector_store_id="vector_store_id",
-        )
-        assert_matches_type(FileContentResponse, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_content(self, client: LlamaStackClient) -> None:
-        response = client.vector_stores.files.with_raw_response.content(
-            file_id="file_id",
-            vector_store_id="vector_store_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(FileContentResponse, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_content(self, client: LlamaStackClient) -> None:
-        with client.vector_stores.files.with_streaming_response.content(
-            file_id="file_id",
-            vector_store_id="vector_store_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(FileContentResponse, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_content(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.vector_stores.files.with_raw_response.content(
-                file_id="file_id",
-                vector_store_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            client.vector_stores.files.with_raw_response.content(
-                file_id="",
-                vector_store_id="vector_store_id",
-            )
-
 
 class TestAsyncFiles:
     parametrize = pytest.mark.parametrize(
@@ -577,51 +528,3 @@ async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) ->
                 file_id="",
                 vector_store_id="vector_store_id",
             )
-
-    @parametrize
-    async def test_method_content(self, async_client: AsyncLlamaStackClient) -> None:
-        file = await async_client.vector_stores.files.content(
-            file_id="file_id",
-            vector_store_id="vector_store_id",
-        )
-        assert_matches_type(FileContentResponse, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_content(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_stores.files.with_raw_response.content(
-            file_id="file_id",
-            vector_store_id="vector_store_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = await response.parse()
-        assert_matches_type(FileContentResponse, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_content(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_stores.files.with_streaming_response.content(
-            file_id="file_id",
-            vector_store_id="vector_store_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(FileContentResponse, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_content(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.vector_stores.files.with_raw_response.content(
-                file_id="file_id",
-                vector_store_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            await async_client.vector_stores.files.with_raw_response.content(
-                file_id="",
-                vector_store_id="vector_store_id",
-            )

From 6e0dcff322ca2a62e027b0089c3bd6fdfec4f27a Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 30 Sep 2025 17:59:03 +0000
Subject: [PATCH 2/7] feat(api): SDKs for vector store file batches apis

---
 .stats.yml                                    |   6 +-
 api.md                                        |  27 +-
 src/llama_stack_client/resources/files.py     | 157 ++++++
 .../resources/vector_stores/__init__.py       |  14 +
 .../resources/vector_stores/file_batches.py   | 521 ++++++++++++++++++
 .../resources/vector_stores/files.py          |  85 +++
 .../resources/vector_stores/vector_stores.py  |  32 ++
 src/llama_stack_client/types/__init__.py      |   1 +
 .../types/delete_file_response.py             |  18 +
 .../types/vector_stores/__init__.py           |   7 +
 .../vector_stores/file_batch_create_params.py |  53 ++
 .../vector_stores/file_batch_list_params.py   |  38 ++
 .../vector_stores/file_content_response.py    |  30 +
 ...st_vector_store_files_in_batch_response.py |  25 +
 .../vector_store_file_batches.py              |  44 ++
 tests/api_resources/test_files.py             | 154 +++++-
 .../vector_stores/test_file_batches.py        | 446 +++++++++++++++
 .../api_resources/vector_stores/test_files.py |  97 ++++
 18 files changed, 1750 insertions(+), 5 deletions(-)
 create mode 100644 src/llama_stack_client/resources/vector_stores/file_batches.py
 create mode 100644 src/llama_stack_client/types/delete_file_response.py
 create mode 100644 src/llama_stack_client/types/vector_stores/file_batch_create_params.py
 create mode 100644 src/llama_stack_client/types/vector_stores/file_batch_list_params.py
 create mode 100644 src/llama_stack_client/types/vector_stores/file_content_response.py
 create mode 100644 src/llama_stack_client/types/vector_stores/list_vector_store_files_in_batch_response.py
 create mode 100644 src/llama_stack_client/types/vector_stores/vector_store_file_batches.py
 create mode 100644 tests/api_resources/vector_stores/test_file_batches.py

diff --git a/.stats.yml b/.stats.yml
index cbb0181e..b453267a 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 102
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-3c569913f686c852ab392d470b9d905cc944d5a46a2324a21aa1376fb24e6714.yml
-openapi_spec_hash: 455f397c5f401ea425a4064bb39c6801
+configured_endpoints: 109
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-05bb7b0636a86ad0b485a5f2abfbd6b9e1873e802235f340af291f9ad9fb03b3.yml
+openapi_spec_hash: a78c30e308bc39473ea8e9ae9d0b726c
 config_hash: 53c09ba1fdae5045de1860c479a51dc7
diff --git a/api.md b/api.md
index 7d4b431a..aa82dbe7 100644
--- a/api.md
+++ b/api.md
@@ -351,7 +351,11 @@ Methods:
 Types:
 
 ```python
-from llama_stack_client.types.vector_stores import VectorStoreFile, FileDeleteResponse
+from llama_stack_client.types.vector_stores import (
+    VectorStoreFile,
+    FileDeleteResponse,
+    FileContentResponse,
+)
 ```
 
 Methods:
@@ -361,6 +365,25 @@ Methods:
 - <code title="post /v1/vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">update</a>(file_id, \*, vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_update_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
 - <code title="get /v1/vector_stores/{vector_store_id}/files">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">list</a>(vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file.py">SyncOpenAICursorPage[VectorStoreFile]</a></code>
 - <code title="delete /v1/vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">delete</a>(file_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/file_delete_response.py">FileDeleteResponse</a></code>
+- <code title="get /v1/vector_stores/{vector_store_id}/files/{file_id}/content">client.vector_stores.files.<a href="./src/llama_stack_client/resources/vector_stores/files.py">content</a>(file_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/file_content_response.py">FileContentResponse</a></code>
+
+## FileBatches
+
+Types:
+
+```python
+from llama_stack_client.types.vector_stores import (
+    ListVectorStoreFilesInBatchResponse,
+    VectorStoreFileBatches,
+)
+```
+
+Methods:
+
+- <code title="post /v1/vector_stores/{vector_store_id}/file_batches">client.vector_stores.file_batches.<a href="./src/llama_stack_client/resources/vector_stores/file_batches.py">create</a>(vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_batch_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file_batches.py">VectorStoreFileBatches</a></code>
+- <code title="get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}">client.vector_stores.file_batches.<a href="./src/llama_stack_client/resources/vector_stores/file_batches.py">retrieve</a>(batch_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file_batches.py">VectorStoreFileBatches</a></code>
+- <code title="get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files">client.vector_stores.file_batches.<a href="./src/llama_stack_client/resources/vector_stores/file_batches.py">list</a>(batch_id, \*, vector_store_id, \*\*<a href="src/llama_stack_client/types/vector_stores/file_batch_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file.py">SyncOpenAICursorPage[VectorStoreFile]</a></code>
+- <code title="post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel">client.vector_stores.file_batches.<a href="./src/llama_stack_client/resources/vector_stores/file_batches.py">cancel</a>(batch_id, \*, vector_store_id) -> <a href="./src/llama_stack_client/types/vector_stores/vector_store_file_batches.py">VectorStoreFileBatches</a></code>
 
 # Models
 
@@ -584,5 +607,7 @@ from llama_stack_client.types import DeleteFileResponse, File, ListFilesResponse
 Methods:
 
 - <code title="post /v1/files">client.files.<a href="./src/llama_stack_client/resources/files.py">create</a>(\*\*<a href="src/llama_stack_client/types/file_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/file.py">File</a></code>
+- <code title="get /v1/files/{file_id}">client.files.<a href="./src/llama_stack_client/resources/files.py">retrieve</a>(file_id) -> <a href="./src/llama_stack_client/types/file.py">File</a></code>
 - <code title="get /v1/files">client.files.<a href="./src/llama_stack_client/resources/files.py">list</a>(\*\*<a href="src/llama_stack_client/types/file_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/file.py">SyncOpenAICursorPage[File]</a></code>
+- <code title="delete /v1/files/{file_id}">client.files.<a href="./src/llama_stack_client/resources/files.py">delete</a>(file_id) -> <a href="./src/llama_stack_client/types/delete_file_response.py">DeleteFileResponse</a></code>
 - <code title="get /v1/files/{file_id}/content">client.files.<a href="./src/llama_stack_client/resources/files.py">content</a>(file_id) -> object</code>
diff --git a/src/llama_stack_client/resources/files.py b/src/llama_stack_client/resources/files.py
index dfe1edea..39add811 100644
--- a/src/llama_stack_client/resources/files.py
+++ b/src/llama_stack_client/resources/files.py
@@ -21,6 +21,7 @@
 from ..pagination import SyncOpenAICursorPage, AsyncOpenAICursorPage
 from ..types.file import File
 from .._base_client import AsyncPaginator, make_request_options
+from ..types.delete_file_response import DeleteFileResponse
 
 __all__ = ["FilesResource", "AsyncFilesResource"]
 
@@ -106,6 +107,39 @@ def create(
             cast_to=File,
         )
 
+    def retrieve(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> File:
+        """
+        Returns information about a specific file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return self._get(
+            f"/v1/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=File,
+        )
+
     def list(
         self,
         *,
@@ -166,6 +200,39 @@ def list(
             model=File,
         )
 
+    def delete(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DeleteFileResponse:
+        """
+        Delete a file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return self._delete(
+            f"/v1/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DeleteFileResponse,
+        )
+
     def content(
         self,
         file_id: str,
@@ -281,6 +348,39 @@ async def create(
             cast_to=File,
         )
 
+    async def retrieve(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> File:
+        """
+        Returns information about a specific file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return await self._get(
+            f"/v1/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=File,
+        )
+
     def list(
         self,
         *,
@@ -341,6 +441,39 @@ def list(
             model=File,
         )
 
+    async def delete(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DeleteFileResponse:
+        """
+        Delete a file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return await self._delete(
+            f"/v1/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DeleteFileResponse,
+        )
+
     async def content(
         self,
         file_id: str,
@@ -382,9 +515,15 @@ def __init__(self, files: FilesResource) -> None:
         self.create = to_raw_response_wrapper(
             files.create,
         )
+        self.retrieve = to_raw_response_wrapper(
+            files.retrieve,
+        )
         self.list = to_raw_response_wrapper(
             files.list,
         )
+        self.delete = to_raw_response_wrapper(
+            files.delete,
+        )
         self.content = to_raw_response_wrapper(
             files.content,
         )
@@ -397,9 +536,15 @@ def __init__(self, files: AsyncFilesResource) -> None:
         self.create = async_to_raw_response_wrapper(
             files.create,
         )
+        self.retrieve = async_to_raw_response_wrapper(
+            files.retrieve,
+        )
         self.list = async_to_raw_response_wrapper(
             files.list,
         )
+        self.delete = async_to_raw_response_wrapper(
+            files.delete,
+        )
         self.content = async_to_raw_response_wrapper(
             files.content,
         )
@@ -412,9 +557,15 @@ def __init__(self, files: FilesResource) -> None:
         self.create = to_streamed_response_wrapper(
             files.create,
         )
+        self.retrieve = to_streamed_response_wrapper(
+            files.retrieve,
+        )
         self.list = to_streamed_response_wrapper(
             files.list,
         )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
         self.content = to_streamed_response_wrapper(
             files.content,
         )
@@ -427,9 +578,15 @@ def __init__(self, files: AsyncFilesResource) -> None:
         self.create = async_to_streamed_response_wrapper(
             files.create,
         )
+        self.retrieve = async_to_streamed_response_wrapper(
+            files.retrieve,
+        )
         self.list = async_to_streamed_response_wrapper(
             files.list,
         )
+        self.delete = async_to_streamed_response_wrapper(
+            files.delete,
+        )
         self.content = async_to_streamed_response_wrapper(
             files.content,
         )
diff --git a/src/llama_stack_client/resources/vector_stores/__init__.py b/src/llama_stack_client/resources/vector_stores/__init__.py
index 85d202da..d83a42b6 100644
--- a/src/llama_stack_client/resources/vector_stores/__init__.py
+++ b/src/llama_stack_client/resources/vector_stores/__init__.py
@@ -8,6 +8,14 @@
     FilesResourceWithStreamingResponse,
     AsyncFilesResourceWithStreamingResponse,
 )
+from .file_batches import (
+    FileBatchesResource,
+    AsyncFileBatchesResource,
+    FileBatchesResourceWithRawResponse,
+    AsyncFileBatchesResourceWithRawResponse,
+    FileBatchesResourceWithStreamingResponse,
+    AsyncFileBatchesResourceWithStreamingResponse,
+)
 from .vector_stores import (
     VectorStoresResource,
     AsyncVectorStoresResource,
@@ -24,6 +32,12 @@
     "AsyncFilesResourceWithRawResponse",
     "FilesResourceWithStreamingResponse",
     "AsyncFilesResourceWithStreamingResponse",
+    "FileBatchesResource",
+    "AsyncFileBatchesResource",
+    "FileBatchesResourceWithRawResponse",
+    "AsyncFileBatchesResourceWithRawResponse",
+    "FileBatchesResourceWithStreamingResponse",
+    "AsyncFileBatchesResourceWithStreamingResponse",
     "VectorStoresResource",
     "AsyncVectorStoresResource",
     "VectorStoresResourceWithRawResponse",
diff --git a/src/llama_stack_client/resources/vector_stores/file_batches.py b/src/llama_stack_client/resources/vector_stores/file_batches.py
new file mode 100644
index 00000000..654fdd79
--- /dev/null
+++ b/src/llama_stack_client/resources/vector_stores/file_batches.py
@@ -0,0 +1,521 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+
+import httpx
+
+from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...pagination import SyncOpenAICursorPage, AsyncOpenAICursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.vector_stores import file_batch_list_params, file_batch_create_params
+from ...types.vector_stores.vector_store_file import VectorStoreFile
+from ...types.vector_stores.vector_store_file_batches import VectorStoreFileBatches
+
+__all__ = ["FileBatchesResource", "AsyncFileBatchesResource"]
+
+
+class FileBatchesResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FileBatchesResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return FileBatchesResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FileBatchesResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return FileBatchesResourceWithStreamingResponse(self)
+
+    def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: SequenceNotStr[str],
+        attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | Omit = omit,
+        chunking_strategy: file_batch_create_params.ChunkingStrategy | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFileBatches:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of File IDs that the vector store should use.
+
+          attributes: (Optional) Key-value attributes to store with the files.
+
+          chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        return self._post(
+            f"/v1/vector_stores/{vector_store_id}/file_batches",
+            body=maybe_transform(
+                {
+                    "file_ids": file_ids,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatches,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFileBatches:
+        """
+        Retrieve a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._get(
+            f"/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatches,
+        )
+
+    def list(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        filter: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> SyncOpenAICursorPage[VectorStoreFile]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list.
+
+          filter: Filter by file status. One of in_progress, completed, failed, cancelled.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._get_api_list(
+            f"/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=SyncOpenAICursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_params.FileBatchListParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFileBatches:
+        """
+        Cancels a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._post(
+            f"/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatches,
+        )
+
+
+class AsyncFileBatchesResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFileBatchesResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFileBatchesResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFileBatchesResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncFileBatchesResourceWithStreamingResponse(self)
+
+    async def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_ids: SequenceNotStr[str],
+        attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | Omit = omit,
+        chunking_strategy: file_batch_create_params.ChunkingStrategy | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFileBatches:
+        """
+        Create a vector store file batch.
+
+        Args:
+          file_ids: A list of File IDs that the vector store should use.
+
+          attributes: (Optional) Key-value attributes to store with the files.
+
+          chunking_strategy: (Optional) The chunking strategy used to chunk the file(s). Defaults to auto.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        return await self._post(
+            f"/v1/vector_stores/{vector_store_id}/file_batches",
+            body=await async_maybe_transform(
+                {
+                    "file_ids": file_ids,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatches,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFileBatches:
+        """
+        Retrieve a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._get(
+            f"/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatches,
+        )
+
+    def list(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        after: str | Omit = omit,
+        before: str | Omit = omit,
+        filter: str | Omit = omit,
+        limit: int | Omit = omit,
+        order: str | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> AsyncPaginator[VectorStoreFile, AsyncOpenAICursorPage[VectorStoreFile]]:
+        """
+        Returns a list of vector store files in a batch.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list.
+
+          filter: Filter by file status. One of in_progress, completed, failed, cancelled.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._get_api_list(
+            f"/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+            page=AsyncOpenAICursorPage[VectorStoreFile],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "filter": filter,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    file_batch_list_params.FileBatchListParams,
+                ),
+            ),
+            model=VectorStoreFile,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> VectorStoreFileBatches:
+        """
+        Cancels a vector store file batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._post(
+            f"/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFileBatches,
+        )
+
+
+class FileBatchesResourceWithRawResponse:
+    def __init__(self, file_batches: FileBatchesResource) -> None:
+        self._file_batches = file_batches
+
+        self.create = to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.list = to_raw_response_wrapper(
+            file_batches.list,
+        )
+        self.cancel = to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+
+
+class AsyncFileBatchesResourceWithRawResponse:
+    def __init__(self, file_batches: AsyncFileBatchesResource) -> None:
+        self._file_batches = file_batches
+
+        self.create = async_to_raw_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = async_to_raw_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.list = async_to_raw_response_wrapper(
+            file_batches.list,
+        )
+        self.cancel = async_to_raw_response_wrapper(
+            file_batches.cancel,
+        )
+
+
+class FileBatchesResourceWithStreamingResponse:
+    def __init__(self, file_batches: FileBatchesResource) -> None:
+        self._file_batches = file_batches
+
+        self.create = to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            file_batches.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
+
+
+class AsyncFileBatchesResourceWithStreamingResponse:
+    def __init__(self, file_batches: AsyncFileBatchesResource) -> None:
+        self._file_batches = file_batches
+
+        self.create = async_to_streamed_response_wrapper(
+            file_batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            file_batches.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            file_batches.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            file_batches.cancel,
+        )
diff --git a/src/llama_stack_client/resources/vector_stores/files.py b/src/llama_stack_client/resources/vector_stores/files.py
index 6d532dfb..f9a1ef31 100644
--- a/src/llama_stack_client/resources/vector_stores/files.py
+++ b/src/llama_stack_client/resources/vector_stores/files.py
@@ -22,6 +22,7 @@
 from ...types.vector_stores import file_list_params, file_create_params, file_update_params
 from ...types.vector_stores.vector_store_file import VectorStoreFile
 from ...types.vector_stores.file_delete_response import FileDeleteResponse
+from ...types.vector_stores.file_content_response import FileContentResponse
 
 __all__ = ["FilesResource", "AsyncFilesResource"]
 
@@ -274,6 +275,42 @@ def delete(
             cast_to=FileDeleteResponse,
         )
 
+    def content(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> FileContentResponse:
+        """
+        Retrieves the contents of a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return self._get(
+            f"/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileContentResponse,
+        )
+
 
 class AsyncFilesResource(AsyncAPIResource):
     @cached_property
@@ -523,6 +560,42 @@ async def delete(
             cast_to=FileDeleteResponse,
         )
 
+    async def content(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> FileContentResponse:
+        """
+        Retrieves the contents of a vector store file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return await self._get(
+            f"/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileContentResponse,
+        )
+
 
 class FilesResourceWithRawResponse:
     def __init__(self, files: FilesResource) -> None:
@@ -543,6 +616,9 @@ def __init__(self, files: FilesResource) -> None:
         self.delete = to_raw_response_wrapper(
             files.delete,
         )
+        self.content = to_raw_response_wrapper(
+            files.content,
+        )
 
 
 class AsyncFilesResourceWithRawResponse:
@@ -564,6 +640,9 @@ def __init__(self, files: AsyncFilesResource) -> None:
         self.delete = async_to_raw_response_wrapper(
             files.delete,
         )
+        self.content = async_to_raw_response_wrapper(
+            files.content,
+        )
 
 
 class FilesResourceWithStreamingResponse:
@@ -585,6 +664,9 @@ def __init__(self, files: FilesResource) -> None:
         self.delete = to_streamed_response_wrapper(
             files.delete,
         )
+        self.content = to_streamed_response_wrapper(
+            files.content,
+        )
 
 
 class AsyncFilesResourceWithStreamingResponse:
@@ -606,3 +688,6 @@ def __init__(self, files: AsyncFilesResource) -> None:
         self.delete = async_to_streamed_response_wrapper(
             files.delete,
         )
+        self.content = async_to_streamed_response_wrapper(
+            files.content,
+        )
diff --git a/src/llama_stack_client/resources/vector_stores/vector_stores.py b/src/llama_stack_client/resources/vector_stores/vector_stores.py
index f858100b..86bb0e06 100644
--- a/src/llama_stack_client/resources/vector_stores/vector_stores.py
+++ b/src/llama_stack_client/resources/vector_stores/vector_stores.py
@@ -31,6 +31,14 @@
     async_to_streamed_response_wrapper,
 )
 from ...pagination import SyncOpenAICursorPage, AsyncOpenAICursorPage
+from .file_batches import (
+    FileBatchesResource,
+    AsyncFileBatchesResource,
+    FileBatchesResourceWithRawResponse,
+    AsyncFileBatchesResourceWithRawResponse,
+    FileBatchesResourceWithStreamingResponse,
+    AsyncFileBatchesResourceWithStreamingResponse,
+)
 from ..._base_client import AsyncPaginator, make_request_options
 from ...types.vector_store import VectorStore
 from ...types.vector_store_delete_response import VectorStoreDeleteResponse
@@ -44,6 +52,10 @@ class VectorStoresResource(SyncAPIResource):
     def files(self) -> FilesResource:
         return FilesResource(self._client)
 
+    @cached_property
+    def file_batches(self) -> FileBatchesResource:
+        return FileBatchesResource(self._client)
+
     @cached_property
     def with_raw_response(self) -> VectorStoresResourceWithRawResponse:
         """
@@ -378,6 +390,10 @@ class AsyncVectorStoresResource(AsyncAPIResource):
     def files(self) -> AsyncFilesResource:
         return AsyncFilesResource(self._client)
 
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesResource:
+        return AsyncFileBatchesResource(self._client)
+
     @cached_property
     def with_raw_response(self) -> AsyncVectorStoresResourceWithRawResponse:
         """
@@ -734,6 +750,10 @@ def __init__(self, vector_stores: VectorStoresResource) -> None:
     def files(self) -> FilesResourceWithRawResponse:
         return FilesResourceWithRawResponse(self._vector_stores.files)
 
+    @cached_property
+    def file_batches(self) -> FileBatchesResourceWithRawResponse:
+        return FileBatchesResourceWithRawResponse(self._vector_stores.file_batches)
+
 
 class AsyncVectorStoresResourceWithRawResponse:
     def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
@@ -762,6 +782,10 @@ def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
     def files(self) -> AsyncFilesResourceWithRawResponse:
         return AsyncFilesResourceWithRawResponse(self._vector_stores.files)
 
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesResourceWithRawResponse:
+        return AsyncFileBatchesResourceWithRawResponse(self._vector_stores.file_batches)
+
 
 class VectorStoresResourceWithStreamingResponse:
     def __init__(self, vector_stores: VectorStoresResource) -> None:
@@ -790,6 +814,10 @@ def __init__(self, vector_stores: VectorStoresResource) -> None:
     def files(self) -> FilesResourceWithStreamingResponse:
         return FilesResourceWithStreamingResponse(self._vector_stores.files)
 
+    @cached_property
+    def file_batches(self) -> FileBatchesResourceWithStreamingResponse:
+        return FileBatchesResourceWithStreamingResponse(self._vector_stores.file_batches)
+
 
 class AsyncVectorStoresResourceWithStreamingResponse:
     def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
@@ -817,3 +845,7 @@ def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
     @cached_property
     def files(self) -> AsyncFilesResourceWithStreamingResponse:
         return AsyncFilesResourceWithStreamingResponse(self._vector_stores.files)
+
+    @cached_property
+    def file_batches(self) -> AsyncFileBatchesResourceWithStreamingResponse:
+        return AsyncFileBatchesResourceWithStreamingResponse(self._vector_stores.file_batches)
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index 7bc1b518..f81ada61 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -64,6 +64,7 @@
 from .run_shield_response import RunShieldResponse as RunShieldResponse
 from .tool_execution_step import ToolExecutionStep as ToolExecutionStep
 from .tool_response_param import ToolResponseParam as ToolResponseParam
+from .delete_file_response import DeleteFileResponse as DeleteFileResponse
 from .eval_run_eval_params import EvalRunEvalParams as EvalRunEvalParams
 from .list_models_response import ListModelsResponse as ListModelsResponse
 from .list_routes_response import ListRoutesResponse as ListRoutesResponse
diff --git a/src/llama_stack_client/types/delete_file_response.py b/src/llama_stack_client/types/delete_file_response.py
new file mode 100644
index 00000000..2188556f
--- /dev/null
+++ b/src/llama_stack_client/types/delete_file_response.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["DeleteFileResponse"]
+
+
+class DeleteFileResponse(BaseModel):
+    id: str
+    """The file identifier that was deleted"""
+
+    deleted: bool
+    """Whether the file was successfully deleted"""
+
+    object: Literal["file"]
+    """The object type, which is always "file" """
diff --git a/src/llama_stack_client/types/vector_stores/__init__.py b/src/llama_stack_client/types/vector_stores/__init__.py
index 2f5fd33b..677030d9 100644
--- a/src/llama_stack_client/types/vector_stores/__init__.py
+++ b/src/llama_stack_client/types/vector_stores/__init__.py
@@ -7,3 +7,10 @@
 from .file_create_params import FileCreateParams as FileCreateParams
 from .file_update_params import FileUpdateParams as FileUpdateParams
 from .file_delete_response import FileDeleteResponse as FileDeleteResponse
+from .file_content_response import FileContentResponse as FileContentResponse
+from .file_batch_list_params import FileBatchListParams as FileBatchListParams
+from .file_batch_create_params import FileBatchCreateParams as FileBatchCreateParams
+from .vector_store_file_batches import VectorStoreFileBatches as VectorStoreFileBatches
+from .list_vector_store_files_in_batch_response import (
+    ListVectorStoreFilesInBatchResponse as ListVectorStoreFilesInBatchResponse,
+)
diff --git a/src/llama_stack_client/types/vector_stores/file_batch_create_params.py b/src/llama_stack_client/types/vector_stores/file_batch_create_params.py
new file mode 100644
index 00000000..e6eba90a
--- /dev/null
+++ b/src/llama_stack_client/types/vector_stores/file_batch_create_params.py
@@ -0,0 +1,53 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..._types import SequenceNotStr
+
+__all__ = [
+    "FileBatchCreateParams",
+    "ChunkingStrategy",
+    "ChunkingStrategyVectorStoreChunkingStrategyAuto",
+    "ChunkingStrategyVectorStoreChunkingStrategyStatic",
+    "ChunkingStrategyVectorStoreChunkingStrategyStaticStatic",
+]
+
+
+class FileBatchCreateParams(TypedDict, total=False):
+    file_ids: Required[SequenceNotStr[str]]
+    """A list of File IDs that the vector store should use."""
+
+    attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """(Optional) Key-value attributes to store with the files."""
+
+    chunking_strategy: ChunkingStrategy
+    """(Optional) The chunking strategy used to chunk the file(s). Defaults to auto."""
+
+
+class ChunkingStrategyVectorStoreChunkingStrategyAuto(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+    """Strategy type, always "auto" for automatic chunking"""
+
+
+class ChunkingStrategyVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """Number of tokens to overlap between adjacent chunks"""
+
+    max_chunk_size_tokens: Required[int]
+    """Maximum number of tokens per chunk, must be between 100 and 4096"""
+
+
+class ChunkingStrategyVectorStoreChunkingStrategyStatic(TypedDict, total=False):
+    static: Required[ChunkingStrategyVectorStoreChunkingStrategyStaticStatic]
+    """Configuration parameters for the static chunking strategy"""
+
+    type: Required[Literal["static"]]
+    """Strategy type, always "static" for static chunking"""
+
+
+ChunkingStrategy: TypeAlias = Union[
+    ChunkingStrategyVectorStoreChunkingStrategyAuto, ChunkingStrategyVectorStoreChunkingStrategyStatic
+]
diff --git a/src/llama_stack_client/types/vector_stores/file_batch_list_params.py b/src/llama_stack_client/types/vector_stores/file_batch_list_params.py
new file mode 100644
index 00000000..79e67eb1
--- /dev/null
+++ b/src/llama_stack_client/types/vector_stores/file_batch_list_params.py
@@ -0,0 +1,38 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["FileBatchListParams"]
+
+
+class FileBatchListParams(TypedDict, total=False):
+    vector_store_id: Required[str]
+
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list.
+    """
+
+    before: str
+    """A cursor for use in pagination.
+
+    `before` is an object ID that defines your place in the list.
+    """
+
+    filter: str
+    """Filter by file status. One of in_progress, completed, failed, cancelled."""
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: str
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
diff --git a/src/llama_stack_client/types/vector_stores/file_content_response.py b/src/llama_stack_client/types/vector_stores/file_content_response.py
new file mode 100644
index 00000000..035a34a8
--- /dev/null
+++ b/src/llama_stack_client/types/vector_stores/file_content_response.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FileContentResponse", "Content"]
+
+
+class Content(BaseModel):
+    text: str
+    """The actual text content"""
+
+    type: Literal["text"]
+    """Content type, currently only "text" is supported"""
+
+
+class FileContentResponse(BaseModel):
+    attributes: Dict[str, Union[bool, float, str, List[object], object, None]]
+    """Key-value attributes associated with the file"""
+
+    content: List[Content]
+    """List of content items from the file"""
+
+    file_id: str
+    """Unique identifier for the file"""
+
+    filename: str
+    """Name of the file"""
diff --git a/src/llama_stack_client/types/vector_stores/list_vector_store_files_in_batch_response.py b/src/llama_stack_client/types/vector_stores/list_vector_store_files_in_batch_response.py
new file mode 100644
index 00000000..34ca9e46
--- /dev/null
+++ b/src/llama_stack_client/types/vector_stores/list_vector_store_files_in_batch_response.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+from .vector_store_file import VectorStoreFile
+
+__all__ = ["ListVectorStoreFilesInBatchResponse"]
+
+
+class ListVectorStoreFilesInBatchResponse(BaseModel):
+    data: List[VectorStoreFile]
+    """List of vector store file objects in the batch"""
+
+    has_more: bool
+    """Whether there are more files available beyond this page"""
+
+    object: str
+    """Object type identifier, always "list" """
+
+    first_id: Optional[str] = None
+    """(Optional) ID of the first file in the list for pagination"""
+
+    last_id: Optional[str] = None
+    """(Optional) ID of the last file in the list for pagination"""
diff --git a/src/llama_stack_client/types/vector_stores/vector_store_file_batches.py b/src/llama_stack_client/types/vector_stores/vector_store_file_batches.py
new file mode 100644
index 00000000..738f7edc
--- /dev/null
+++ b/src/llama_stack_client/types/vector_stores/vector_store_file_batches.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["VectorStoreFileBatches", "FileCounts"]
+
+
+class FileCounts(BaseModel):
+    cancelled: int
+    """Number of files that had their processing cancelled"""
+
+    completed: int
+    """Number of files that have been successfully processed"""
+
+    failed: int
+    """Number of files that failed to process"""
+
+    in_progress: int
+    """Number of files currently being processed"""
+
+    total: int
+    """Total number of files in the vector store"""
+
+
+class VectorStoreFileBatches(BaseModel):
+    id: str
+    """Unique identifier for the file batch"""
+
+    created_at: int
+    """Timestamp when the file batch was created"""
+
+    file_counts: FileCounts
+    """File processing status counts for the batch"""
+
+    object: str
+    """Object type identifier, always "vector_store.file_batch" """
+
+    status: Literal["completed", "in_progress", "cancelled", "failed"]
+    """Current processing status of the file batch"""
+
+    vector_store_id: str
+    """ID of the vector store containing the file batch"""
diff --git a/tests/api_resources/test_files.py b/tests/api_resources/test_files.py
index 926347a5..83b763ab 100644
--- a/tests/api_resources/test_files.py
+++ b/tests/api_resources/test_files.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import File
+from llama_stack_client.types import File, DeleteFileResponse
 from llama_stack_client.pagination import SyncOpenAICursorPage, AsyncOpenAICursorPage
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -64,6 +64,44 @@ def test_streaming_response_create(self, client: LlamaStackClient) -> None:
 
         assert cast(Any, response.is_closed) is True
 
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackClient) -> None:
+        file = client.files.retrieve(
+            "file_id",
+        )
+        assert_matches_type(File, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
+        response = client.files.with_raw_response.retrieve(
+            "file_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(File, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
+        with client.files.with_streaming_response.retrieve(
+            "file_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(File, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.files.with_raw_response.retrieve(
+                "",
+            )
+
     @parametrize
     def test_method_list(self, client: LlamaStackClient) -> None:
         file = client.files.list()
@@ -99,6 +137,44 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
         assert cast(Any, response.is_closed) is True
 
+    @parametrize
+    def test_method_delete(self, client: LlamaStackClient) -> None:
+        file = client.files.delete(
+            "file_id",
+        )
+        assert_matches_type(DeleteFileResponse, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: LlamaStackClient) -> None:
+        response = client.files.with_raw_response.delete(
+            "file_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(DeleteFileResponse, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
+        with client.files.with_streaming_response.delete(
+            "file_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(DeleteFileResponse, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.files.with_raw_response.delete(
+                "",
+            )
+
     @parametrize
     def test_method_content(self, client: LlamaStackClient) -> None:
         file = client.files.content(
@@ -189,6 +265,44 @@ async def test_streaming_response_create(self, async_client: AsyncLlamaStackClie
 
         assert cast(Any, response.is_closed) is True
 
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        file = await async_client.files.retrieve(
+            "file_id",
+        )
+        assert_matches_type(File, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.files.with_raw_response.retrieve(
+            "file_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = await response.parse()
+        assert_matches_type(File, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.files.with_streaming_response.retrieve(
+            "file_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(File, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.files.with_raw_response.retrieve(
+                "",
+            )
+
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         file = await async_client.files.list()
@@ -224,6 +338,44 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
 
         assert cast(Any, response.is_closed) is True
 
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        file = await async_client.files.delete(
+            "file_id",
+        )
+        assert_matches_type(DeleteFileResponse, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.files.with_raw_response.delete(
+            "file_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = await response.parse()
+        assert_matches_type(DeleteFileResponse, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.files.with_streaming_response.delete(
+            "file_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(DeleteFileResponse, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.files.with_raw_response.delete(
+                "",
+            )
+
     @parametrize
     async def test_method_content(self, async_client: AsyncLlamaStackClient) -> None:
         file = await async_client.files.content(
diff --git a/tests/api_resources/vector_stores/test_file_batches.py b/tests/api_resources/vector_stores/test_file_batches.py
new file mode 100644
index 00000000..b92f31b2
--- /dev/null
+++ b/tests/api_resources/vector_stores/test_file_batches.py
@@ -0,0 +1,446 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.pagination import SyncOpenAICursorPage, AsyncOpenAICursorPage
+from llama_stack_client.types.vector_stores import (
+    VectorStoreFile,
+    VectorStoreFileBatches,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestFileBatches:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: LlamaStackClient) -> None:
+        file_batch = client.vector_stores.file_batches.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
+        file_batch = client.vector_stores.file_batches.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+            attributes={"foo": True},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: LlamaStackClient) -> None:
+        response = client.vector_stores.file_batches.with_raw_response.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
+        with client.vector_stores.file_batches.with_streaming_response.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.create(
+                vector_store_id="",
+                file_ids=["string"],
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackClient) -> None:
+        file_batch = client.vector_stores.file_batches.retrieve(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
+        response = client.vector_stores.file_batches.with_raw_response.retrieve(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
+        with client.vector_stores.file_batches.with_streaming_response.retrieve(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    def test_method_list(self, client: LlamaStackClient) -> None:
+        file_batch = client.vector_stores.file_batches.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(SyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
+        file_batch = client.vector_stores.file_batches.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
+            filter="filter",
+            limit=0,
+            order="order",
+        )
+        assert_matches_type(SyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: LlamaStackClient) -> None:
+        response = client.vector_stores.file_batches.with_raw_response.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(SyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
+        with client.vector_stores.file_batches.with_streaming_response.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(SyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.list(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.list(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    def test_method_cancel(self, client: LlamaStackClient) -> None:
+        file_batch = client.vector_stores.file_batches.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
+        response = client.vector_stores.file_batches.with_raw_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = response.parse()
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
+        with client.vector_stores.file_batches.with_streaming_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = response.parse()
+            assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
+
+
+class TestAsyncFileBatches:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
+        file_batch = await async_client.vector_stores.file_batches.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        file_batch = await async_client.vector_stores.file_batches.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+            attributes={"foo": True},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.vector_stores.file_batches.with_raw_response.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = await response.parse()
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.vector_stores.file_batches.with_streaming_response.create(
+            vector_store_id="vector_store_id",
+            file_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.create(
+                vector_store_id="",
+                file_ids=["string"],
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        file_batch = await async_client.vector_stores.file_batches.retrieve(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = await response.parse()
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.vector_stores.file_batches.with_streaming_response.retrieve(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
+        file_batch = await async_client.vector_stores.file_batches.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(AsyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        file_batch = await async_client.vector_stores.file_batches.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
+            filter="filter",
+            limit=0,
+            order="order",
+        )
+        assert_matches_type(AsyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.vector_stores.file_batches.with_raw_response.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = await response.parse()
+        assert_matches_type(AsyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.vector_stores.file_batches.with_streaming_response.list(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(AsyncOpenAICursorPage[VectorStoreFile], file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.list(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.list(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        file_batch = await async_client.vector_stores.file_batches.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.vector_stores.file_batches.with_raw_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file_batch = await response.parse()
+        assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.vector_stores.file_batches.with_streaming_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file_batch = await response.parse()
+            assert_matches_type(VectorStoreFileBatches, file_batch, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="batch_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
+            await async_client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="",
+                vector_store_id="vector_store_id",
+            )
diff --git a/tests/api_resources/vector_stores/test_files.py b/tests/api_resources/vector_stores/test_files.py
index 5cc9f9c1..cf38bc2b 100644
--- a/tests/api_resources/vector_stores/test_files.py
+++ b/tests/api_resources/vector_stores/test_files.py
@@ -13,6 +13,7 @@
 from llama_stack_client.types.vector_stores import (
     VectorStoreFile,
     FileDeleteResponse,
+    FileContentResponse,
 )
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -272,6 +273,54 @@ def test_path_params_delete(self, client: LlamaStackClient) -> None:
                 vector_store_id="vector_store_id",
             )
 
+    @parametrize
+    def test_method_content(self, client: LlamaStackClient) -> None:
+        file = client.vector_stores.files.content(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(FileContentResponse, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_content(self, client: LlamaStackClient) -> None:
+        response = client.vector_stores.files.with_raw_response.content(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(FileContentResponse, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_content(self, client: LlamaStackClient) -> None:
+        with client.vector_stores.files.with_streaming_response.content(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(FileContentResponse, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_content(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.content(
+                file_id="file_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.vector_stores.files.with_raw_response.content(
+                file_id="",
+                vector_store_id="vector_store_id",
+            )
+
 
 class TestAsyncFiles:
     parametrize = pytest.mark.parametrize(
@@ -528,3 +577,51 @@ async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) ->
                 file_id="",
                 vector_store_id="vector_store_id",
             )
+
+    @parametrize
+    async def test_method_content(self, async_client: AsyncLlamaStackClient) -> None:
+        file = await async_client.vector_stores.files.content(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(FileContentResponse, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_content(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.content(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = await response.parse()
+        assert_matches_type(FileContentResponse, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_content(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.content(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(FileContentResponse, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_content(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.content(
+                file_id="file_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.content(
+                file_id="",
+                vector_store_id="vector_store_id",
+            )

From 04caaad5e2631d66f5a0d30c07e1872d3aded894 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 30 Sep 2025 19:35:48 +0000
Subject: [PATCH 3/7] feat(api): moving { rerank, agents } to `client.alpha.`

---
 .stats.yml                                    |   6 +-
 api.md                                        | 167 +++++++++---------
 src/llama_stack_client/_client.py             | 114 ++++--------
 src/llama_stack_client/resources/__init__.py  |  42 ++---
 .../resources/alpha/__init__.py               |  47 +++++
 .../resources/{ => alpha}/agents/__init__.py  |   0
 .../resources/{ => alpha}/agents/agents.py    |  22 +--
 .../resources/{ => alpha}/agents/session.py   |  20 +--
 .../resources/{ => alpha}/agents/steps.py     |  12 +-
 .../resources/{ => alpha}/agents/turn.py      |  53 +++---
 .../resources/alpha/alpha.py                  | 134 ++++++++++++++
 .../resources/{ => alpha}/inference.py        |  18 +-
 src/llama_stack_client/types/__init__.py      |  16 --
 .../agent_turn_response_stream_chunk.py       |  11 --
 .../types/agents/turn_response_event.py       | 160 -----------------
 .../types/alpha/__init__.py                   |  17 ++
 .../types/{ => alpha}/agent_create_params.py  |   2 +-
 .../{ => alpha}/agent_create_response.py      |   2 +-
 .../types/{ => alpha}/agent_list_params.py    |   0
 .../types/{ => alpha}/agent_list_response.py  |   2 +-
 .../{ => alpha}/agent_retrieve_response.py    |   4 +-
 .../types/{ => alpha}/agents/__init__.py      |   2 -
 .../types/{ => alpha}/agents/session.py       |   2 +-
 .../agents/session_create_params.py           |   0
 .../agents/session_create_response.py         |   2 +-
 .../{ => alpha}/agents/session_list_params.py |   0
 .../agents/session_list_response.py           |   2 +-
 .../agents/session_retrieve_params.py         |   2 +-
 .../agents/step_retrieve_response.py          |   4 +-
 .../types/{ => alpha}/agents/turn.py          |  12 +-
 .../{ => alpha}/agents/turn_create_params.py  |   8 +-
 .../{ => alpha}/agents/turn_resume_params.py  |   0
 .../{ => alpha}/inference_rerank_params.py    |   2 +-
 .../{ => alpha}/inference_rerank_response.py  |   2 +-
 .../types/{ => alpha}/inference_step.py       |   4 +-
 .../{ => alpha}/memory_retrieval_step.py      |   4 +-
 .../types/{ => alpha}/shield_call_step.py     |   4 +-
 .../types/{ => alpha}/tool_execution_step.py  |   4 +-
 .../types/{ => alpha}/tool_response.py        |   4 +-
 .../types/{ => alpha}/tool_response_param.py  |   2 +-
 .../types/shared/__init__.py                  |   3 -
 .../types/shared/chat_completion_response.py  |  25 ---
 src/llama_stack_client/types/shared/metric.py |  18 --
 .../types/shared/tool_param_definition.py     |  21 ---
 .../{agents => alpha}/__init__.py             |   0
 tests/api_resources/alpha/agents/__init__.py  |   1 +
 .../{ => alpha}/agents/test_session.py        |  82 ++++-----
 .../{ => alpha}/agents/test_steps.py          |  30 ++--
 .../{ => alpha}/agents/test_turn.py           | 126 ++++++-------
 .../api_resources/{ => alpha}/test_agents.py  |  66 +++----
 .../{ => alpha}/test_inference.py             |  18 +-
 51 files changed, 598 insertions(+), 701 deletions(-)
 create mode 100644 src/llama_stack_client/resources/alpha/__init__.py
 rename src/llama_stack_client/resources/{ => alpha}/agents/__init__.py (100%)
 rename src/llama_stack_client/resources/{ => alpha}/agents/agents.py (96%)
 rename src/llama_stack_client/resources/{ => alpha}/agents/session.py (96%)
 rename src/llama_stack_client/resources/{ => alpha}/agents/steps.py (95%)
 rename src/llama_stack_client/resources/{ => alpha}/agents/turn.py (95%)
 create mode 100644 src/llama_stack_client/resources/alpha/alpha.py
 rename src/llama_stack_client/resources/{ => alpha}/inference.py (94%)
 delete mode 100644 src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py
 delete mode 100644 src/llama_stack_client/types/agents/turn_response_event.py
 create mode 100644 src/llama_stack_client/types/alpha/__init__.py
 rename src/llama_stack_client/types/{ => alpha}/agent_create_params.py (86%)
 rename src/llama_stack_client/types/{ => alpha}/agent_create_response.py (87%)
 rename src/llama_stack_client/types/{ => alpha}/agent_list_params.py (100%)
 rename src/llama_stack_client/types/{ => alpha}/agent_list_response.py (93%)
 rename src/llama_stack_client/types/{ => alpha}/agent_retrieve_response.py (83%)
 rename src/llama_stack_client/types/{ => alpha}/agents/__init__.py (81%)
 rename src/llama_stack_client/types/{ => alpha}/agents/session.py (93%)
 rename src/llama_stack_client/types/{ => alpha}/agents/session_create_params.py (100%)
 rename src/llama_stack_client/types/{ => alpha}/agents/session_create_response.py (87%)
 rename src/llama_stack_client/types/{ => alpha}/agents/session_list_params.py (100%)
 rename src/llama_stack_client/types/{ => alpha}/agents/session_list_response.py (93%)
 rename src/llama_stack_client/types/{ => alpha}/agents/session_retrieve_params.py (91%)
 rename src/llama_stack_client/types/{ => alpha}/agents/step_retrieve_response.py (90%)
 rename src/llama_stack_client/types/{ => alpha}/agents/turn.py (91%)
 rename src/llama_stack_client/types/{ => alpha}/agents/turn_create_params.py (95%)
 rename src/llama_stack_client/types/{ => alpha}/agents/turn_resume_params.py (100%)
 rename src/llama_stack_client/types/{ => alpha}/inference_rerank_params.py (98%)
 rename src/llama_stack_client/types/{ => alpha}/inference_rerank_response.py (94%)
 rename src/llama_stack_client/types/{ => alpha}/inference_step.py (89%)
 rename src/llama_stack_client/types/{ => alpha}/memory_retrieval_step.py (89%)
 rename src/llama_stack_client/types/{ => alpha}/shield_call_step.py (88%)
 rename src/llama_stack_client/types/{ => alpha}/tool_execution_step.py (91%)
 rename src/llama_stack_client/types/{ => alpha}/tool_response.py (88%)
 rename src/llama_stack_client/types/{ => alpha}/tool_response_param.py (92%)
 delete mode 100644 src/llama_stack_client/types/shared/chat_completion_response.py
 delete mode 100644 src/llama_stack_client/types/shared/metric.py
 delete mode 100644 src/llama_stack_client/types/shared/tool_param_definition.py
 rename tests/api_resources/{agents => alpha}/__init__.py (100%)
 create mode 100644 tests/api_resources/alpha/agents/__init__.py
 rename tests/api_resources/{ => alpha}/agents/test_session.py (82%)
 rename tests/api_resources/{ => alpha}/agents/test_steps.py (84%)
 rename tests/api_resources/{ => alpha}/agents/test_turn.py (88%)
 rename tests/api_resources/{ => alpha}/test_agents.py (87%)
 rename tests/api_resources/{ => alpha}/test_inference.py (86%)

diff --git a/.stats.yml b/.stats.yml
index b453267a..35d70772 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 109
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-05bb7b0636a86ad0b485a5f2abfbd6b9e1873e802235f340af291f9ad9fb03b3.yml
-openapi_spec_hash: a78c30e308bc39473ea8e9ae9d0b726c
-config_hash: 53c09ba1fdae5045de1860c479a51dc7
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-4337a6181c2db17737133e944b4b660a5e00ea10dce6be3252918e39451e9b5f.yml
+openapi_spec_hash: a0bc8f4b5f45bc5741fed8eaa61171c3
+config_hash: 03aab396899c7d9aa3fba867ce54824b
diff --git a/api.md b/api.md
index aa82dbe7..00db1cb8 100644
--- a/api.md
+++ b/api.md
@@ -3,13 +3,11 @@
 ```python
 from llama_stack_client.types import (
     AgentConfig,
-    ChatCompletionResponse,
     CompletionMessage,
     Document,
     InterleavedContent,
     InterleavedContentItem,
     Message,
-    Metric,
     ParamType,
     QueryConfig,
     QueryResult,
@@ -19,7 +17,6 @@ from llama_stack_client.types import (
     ScoringResult,
     SystemMessage,
     ToolCall,
-    ToolParamDefinition,
     ToolResponseMessage,
     UserMessage,
 )
@@ -105,71 +102,6 @@ Methods:
 
 - <code title="get /v1/responses/{response_id}/input_items">client.responses.input_items.<a href="./src/llama_stack_client/resources/responses/input_items.py">list</a>(response_id, \*\*<a href="src/llama_stack_client/types/responses/input_item_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/responses/input_item_list_response.py">InputItemListResponse</a></code>
 
-# Agents
-
-Types:
-
-```python
-from llama_stack_client.types import (
-    InferenceStep,
-    MemoryRetrievalStep,
-    ShieldCallStep,
-    ToolExecutionStep,
-    ToolResponse,
-    AgentCreateResponse,
-    AgentRetrieveResponse,
-    AgentListResponse,
-)
-```
-
-Methods:
-
-- <code title="post /v1/agents">client.agents.<a href="./src/llama_stack_client/resources/agents/agents.py">create</a>(\*\*<a href="src/llama_stack_client/types/agent_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agent_create_response.py">AgentCreateResponse</a></code>
-- <code title="get /v1/agents/{agent_id}">client.agents.<a href="./src/llama_stack_client/resources/agents/agents.py">retrieve</a>(agent_id) -> <a href="./src/llama_stack_client/types/agent_retrieve_response.py">AgentRetrieveResponse</a></code>
-- <code title="get /v1/agents">client.agents.<a href="./src/llama_stack_client/resources/agents/agents.py">list</a>(\*\*<a href="src/llama_stack_client/types/agent_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agent_list_response.py">AgentListResponse</a></code>
-- <code title="delete /v1/agents/{agent_id}">client.agents.<a href="./src/llama_stack_client/resources/agents/agents.py">delete</a>(agent_id) -> None</code>
-
-## Session
-
-Types:
-
-```python
-from llama_stack_client.types.agents import Session, SessionCreateResponse, SessionListResponse
-```
-
-Methods:
-
-- <code title="post /v1/agents/{agent_id}/session">client.agents.session.<a href="./src/llama_stack_client/resources/agents/session.py">create</a>(agent_id, \*\*<a href="src/llama_stack_client/types/agents/session_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/session_create_response.py">SessionCreateResponse</a></code>
-- <code title="get /v1/agents/{agent_id}/session/{session_id}">client.agents.session.<a href="./src/llama_stack_client/resources/agents/session.py">retrieve</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/agents/session_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/session.py">Session</a></code>
-- <code title="get /v1/agents/{agent_id}/sessions">client.agents.session.<a href="./src/llama_stack_client/resources/agents/session.py">list</a>(agent_id, \*\*<a href="src/llama_stack_client/types/agents/session_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/session_list_response.py">SessionListResponse</a></code>
-- <code title="delete /v1/agents/{agent_id}/session/{session_id}">client.agents.session.<a href="./src/llama_stack_client/resources/agents/session.py">delete</a>(session_id, \*, agent_id) -> None</code>
-
-## Steps
-
-Types:
-
-```python
-from llama_stack_client.types.agents import StepRetrieveResponse
-```
-
-Methods:
-
-- <code title="get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}">client.agents.steps.<a href="./src/llama_stack_client/resources/agents/steps.py">retrieve</a>(step_id, \*, agent_id, session_id, turn_id) -> <a href="./src/llama_stack_client/types/agents/step_retrieve_response.py">StepRetrieveResponse</a></code>
-
-## Turn
-
-Types:
-
-```python
-from llama_stack_client.types.agents import AgentTurnResponseStreamChunk, Turn, TurnResponseEvent
-```
-
-Methods:
-
-- <code title="post /v1/agents/{agent_id}/session/{session_id}/turn">client.agents.turn.<a href="./src/llama_stack_client/resources/agents/turn.py">create</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/agents/turn_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/turn.py">Turn</a></code>
-- <code title="get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}">client.agents.turn.<a href="./src/llama_stack_client/resources/agents/turn.py">retrieve</a>(turn_id, \*, agent_id, session_id) -> <a href="./src/llama_stack_client/types/agents/turn.py">Turn</a></code>
-- <code title="post /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume">client.agents.turn.<a href="./src/llama_stack_client/resources/agents/turn.py">resume</a>(turn_id, \*, agent_id, session_id, \*\*<a href="src/llama_stack_client/types/agents/turn_resume_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/turn.py">Turn</a></code>
-
 # Datasets
 
 Types:
@@ -229,18 +161,6 @@ Methods:
 - <code title="get /v1/health">client.inspect.<a href="./src/llama_stack_client/resources/inspect.py">health</a>() -> <a href="./src/llama_stack_client/types/health_info.py">HealthInfo</a></code>
 - <code title="get /v1/version">client.inspect.<a href="./src/llama_stack_client/resources/inspect.py">version</a>() -> <a href="./src/llama_stack_client/types/version_info.py">VersionInfo</a></code>
 
-# Inference
-
-Types:
-
-```python
-from llama_stack_client.types import InferenceRerankResponse
-```
-
-Methods:
-
-- <code title="post /v1alpha/inference/rerank">client.inference.<a href="./src/llama_stack_client/resources/inference.py">rerank</a>(\*\*<a href="src/llama_stack_client/types/inference_rerank_params.py">params</a>) -> <a href="./src/llama_stack_client/types/inference_rerank_response.py">InferenceRerankResponse</a></code>
-
 # Embeddings
 
 Types:
@@ -611,3 +531,90 @@ Methods:
 - <code title="get /v1/files">client.files.<a href="./src/llama_stack_client/resources/files.py">list</a>(\*\*<a href="src/llama_stack_client/types/file_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/file.py">SyncOpenAICursorPage[File]</a></code>
 - <code title="delete /v1/files/{file_id}">client.files.<a href="./src/llama_stack_client/resources/files.py">delete</a>(file_id) -> <a href="./src/llama_stack_client/types/delete_file_response.py">DeleteFileResponse</a></code>
 - <code title="get /v1/files/{file_id}/content">client.files.<a href="./src/llama_stack_client/resources/files.py">content</a>(file_id) -> object</code>
+
+# Alpha
+
+## Inference
+
+Types:
+
+```python
+from llama_stack_client.types.alpha import InferenceRerankResponse
+```
+
+Methods:
+
+- <code title="post /v1alpha/inference/rerank">client.alpha.inference.<a href="./src/llama_stack_client/resources/alpha/inference.py">rerank</a>(\*\*<a href="src/llama_stack_client/types/alpha/inference_rerank_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/inference_rerank_response.py">InferenceRerankResponse</a></code>
+
+## Agents
+
+Types:
+
+```python
+from llama_stack_client.types.alpha import (
+    InferenceStep,
+    MemoryRetrievalStep,
+    ShieldCallStep,
+    ToolExecutionStep,
+    ToolResponse,
+    AgentCreateResponse,
+    AgentRetrieveResponse,
+    AgentListResponse,
+)
+```
+
+Methods:
+
+- <code title="post /v1/agents">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">create</a>(\*\*<a href="src/llama_stack_client/types/alpha/agent_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agent_create_response.py">AgentCreateResponse</a></code>
+- <code title="get /v1/agents/{agent_id}">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">retrieve</a>(agent_id) -> <a href="./src/llama_stack_client/types/alpha/agent_retrieve_response.py">AgentRetrieveResponse</a></code>
+- <code title="get /v1/agents">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">list</a>(\*\*<a href="src/llama_stack_client/types/alpha/agent_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agent_list_response.py">AgentListResponse</a></code>
+- <code title="delete /v1/agents/{agent_id}">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">delete</a>(agent_id) -> None</code>
+
+### Session
+
+Types:
+
+```python
+from llama_stack_client.types.alpha.agents import (
+    Session,
+    SessionCreateResponse,
+    SessionListResponse,
+)
+```
+
+Methods:
+
+- <code title="post /v1/agents/{agent_id}/session">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">create</a>(agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session_create_response.py">SessionCreateResponse</a></code>
+- <code title="get /v1/agents/{agent_id}/session/{session_id}">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">retrieve</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session.py">Session</a></code>
+- <code title="get /v1/agents/{agent_id}/sessions">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">list</a>(agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session_list_response.py">SessionListResponse</a></code>
+- <code title="delete /v1/agents/{agent_id}/session/{session_id}">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">delete</a>(session_id, \*, agent_id) -> None</code>
+
+### Steps
+
+Types:
+
+```python
+from llama_stack_client.types.alpha.agents import StepRetrieveResponse
+```
+
+Methods:
+
+- <code title="get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}">client.alpha.agents.steps.<a href="./src/llama_stack_client/resources/alpha/agents/steps.py">retrieve</a>(step_id, \*, agent_id, session_id, turn_id) -> <a href="./src/llama_stack_client/types/alpha/agents/step_retrieve_response.py">StepRetrieveResponse</a></code>
+
+### Turn
+
+Types:
+
+```python
+from llama_stack_client.types.alpha.agents import (
+    AgentTurnResponseStreamChunk,
+    Turn,
+    TurnResponseEvent,
+)
+```
+
+Methods:
+
+- <code title="post /v1/agents/{agent_id}/session/{session_id}/turn">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">create</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/turn_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
+- <code title="get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">retrieve</a>(turn_id, \*, agent_id, session_id) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
+- <code title="post /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">resume</a>(turn_id, \*, agent_id, session_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/turn_resume_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
index 3feccb40..5c106b37 100644
--- a/src/llama_stack_client/_client.py
+++ b/src/llama_stack_client/_client.py
@@ -35,9 +35,9 @@
     from .resources import (
         chat,
         eval,
+        alpha,
         files,
         tools,
-        agents,
         models,
         routes,
         safety,
@@ -45,7 +45,6 @@
         scoring,
         shields,
         datasets,
-        inference,
         providers,
         responses,
         telemetry,
@@ -72,7 +71,6 @@
     from .resources.datasets import DatasetsResource, AsyncDatasetsResource
     from .resources.chat.chat import ChatResource, AsyncChatResource
     from .resources.eval.eval import EvalResource, AsyncEvalResource
-    from .resources.inference import InferenceResource, AsyncInferenceResource
     from .resources.providers import ProvidersResource, AsyncProvidersResource
     from .resources.telemetry import TelemetryResource, AsyncTelemetryResource
     from .resources.vector_io import VectorIoResource, AsyncVectorIoResource
@@ -80,9 +78,9 @@
     from .resources.embeddings import EmbeddingsResource, AsyncEmbeddingsResource
     from .resources.toolgroups import ToolgroupsResource, AsyncToolgroupsResource
     from .resources.vector_dbs import VectorDBsResource, AsyncVectorDBsResource
+    from .resources.alpha.alpha import AlphaResource, AsyncAlphaResource
     from .resources.completions import CompletionsResource, AsyncCompletionsResource
     from .resources.moderations import ModerationsResource, AsyncModerationsResource
-    from .resources.agents.agents import AgentsResource, AsyncAgentsResource
     from .resources.models.models import ModelsResource, AsyncModelsResource
     from .resources.scoring_functions import ScoringFunctionsResource, AsyncScoringFunctionsResource
     from .resources.responses.responses import ResponsesResource, AsyncResponsesResource
@@ -187,12 +185,6 @@ def responses(self) -> ResponsesResource:
 
         return ResponsesResource(self)
 
-    @cached_property
-    def agents(self) -> AgentsResource:
-        from .resources.agents import AgentsResource
-
-        return AgentsResource(self)
-
     @cached_property
     def datasets(self) -> DatasetsResource:
         from .resources.datasets import DatasetsResource
@@ -211,12 +203,6 @@ def inspect(self) -> InspectResource:
 
         return InspectResource(self)
 
-    @cached_property
-    def inference(self) -> InferenceResource:
-        from .resources.inference import InferenceResource
-
-        return InferenceResource(self)
-
     @cached_property
     def embeddings(self) -> EmbeddingsResource:
         from .resources.embeddings import EmbeddingsResource
@@ -331,6 +317,12 @@ def files(self) -> FilesResource:
 
         return FilesResource(self)
 
+    @cached_property
+    def alpha(self) -> AlphaResource:
+        from .resources.alpha import AlphaResource
+
+        return AlphaResource(self)
+
     @cached_property
     def with_raw_response(self) -> LlamaStackClientWithRawResponse:
         return LlamaStackClientWithRawResponse(self)
@@ -527,12 +519,6 @@ def responses(self) -> AsyncResponsesResource:
 
         return AsyncResponsesResource(self)
 
-    @cached_property
-    def agents(self) -> AsyncAgentsResource:
-        from .resources.agents import AsyncAgentsResource
-
-        return AsyncAgentsResource(self)
-
     @cached_property
     def datasets(self) -> AsyncDatasetsResource:
         from .resources.datasets import AsyncDatasetsResource
@@ -551,12 +537,6 @@ def inspect(self) -> AsyncInspectResource:
 
         return AsyncInspectResource(self)
 
-    @cached_property
-    def inference(self) -> AsyncInferenceResource:
-        from .resources.inference import AsyncInferenceResource
-
-        return AsyncInferenceResource(self)
-
     @cached_property
     def embeddings(self) -> AsyncEmbeddingsResource:
         from .resources.embeddings import AsyncEmbeddingsResource
@@ -671,6 +651,12 @@ def files(self) -> AsyncFilesResource:
 
         return AsyncFilesResource(self)
 
+    @cached_property
+    def alpha(self) -> AsyncAlphaResource:
+        from .resources.alpha import AsyncAlphaResource
+
+        return AsyncAlphaResource(self)
+
     @cached_property
     def with_raw_response(self) -> AsyncLlamaStackClientWithRawResponse:
         return AsyncLlamaStackClientWithRawResponse(self)
@@ -816,12 +802,6 @@ def responses(self) -> responses.ResponsesResourceWithRawResponse:
 
         return ResponsesResourceWithRawResponse(self._client.responses)
 
-    @cached_property
-    def agents(self) -> agents.AgentsResourceWithRawResponse:
-        from .resources.agents import AgentsResourceWithRawResponse
-
-        return AgentsResourceWithRawResponse(self._client.agents)
-
     @cached_property
     def datasets(self) -> datasets.DatasetsResourceWithRawResponse:
         from .resources.datasets import DatasetsResourceWithRawResponse
@@ -840,12 +820,6 @@ def inspect(self) -> inspect.InspectResourceWithRawResponse:
 
         return InspectResourceWithRawResponse(self._client.inspect)
 
-    @cached_property
-    def inference(self) -> inference.InferenceResourceWithRawResponse:
-        from .resources.inference import InferenceResourceWithRawResponse
-
-        return InferenceResourceWithRawResponse(self._client.inference)
-
     @cached_property
     def embeddings(self) -> embeddings.EmbeddingsResourceWithRawResponse:
         from .resources.embeddings import EmbeddingsResourceWithRawResponse
@@ -960,6 +934,12 @@ def files(self) -> files.FilesResourceWithRawResponse:
 
         return FilesResourceWithRawResponse(self._client.files)
 
+    @cached_property
+    def alpha(self) -> alpha.AlphaResourceWithRawResponse:
+        from .resources.alpha import AlphaResourceWithRawResponse
+
+        return AlphaResourceWithRawResponse(self._client.alpha)
+
 
 class AsyncLlamaStackClientWithRawResponse:
     _client: AsyncLlamaStackClient
@@ -991,12 +971,6 @@ def responses(self) -> responses.AsyncResponsesResourceWithRawResponse:
 
         return AsyncResponsesResourceWithRawResponse(self._client.responses)
 
-    @cached_property
-    def agents(self) -> agents.AsyncAgentsResourceWithRawResponse:
-        from .resources.agents import AsyncAgentsResourceWithRawResponse
-
-        return AsyncAgentsResourceWithRawResponse(self._client.agents)
-
     @cached_property
     def datasets(self) -> datasets.AsyncDatasetsResourceWithRawResponse:
         from .resources.datasets import AsyncDatasetsResourceWithRawResponse
@@ -1015,12 +989,6 @@ def inspect(self) -> inspect.AsyncInspectResourceWithRawResponse:
 
         return AsyncInspectResourceWithRawResponse(self._client.inspect)
 
-    @cached_property
-    def inference(self) -> inference.AsyncInferenceResourceWithRawResponse:
-        from .resources.inference import AsyncInferenceResourceWithRawResponse
-
-        return AsyncInferenceResourceWithRawResponse(self._client.inference)
-
     @cached_property
     def embeddings(self) -> embeddings.AsyncEmbeddingsResourceWithRawResponse:
         from .resources.embeddings import AsyncEmbeddingsResourceWithRawResponse
@@ -1137,6 +1105,12 @@ def files(self) -> files.AsyncFilesResourceWithRawResponse:
 
         return AsyncFilesResourceWithRawResponse(self._client.files)
 
+    @cached_property
+    def alpha(self) -> alpha.AsyncAlphaResourceWithRawResponse:
+        from .resources.alpha import AsyncAlphaResourceWithRawResponse
+
+        return AsyncAlphaResourceWithRawResponse(self._client.alpha)
+
 
 class LlamaStackClientWithStreamedResponse:
     _client: LlamaStackClient
@@ -1168,12 +1142,6 @@ def responses(self) -> responses.ResponsesResourceWithStreamingResponse:
 
         return ResponsesResourceWithStreamingResponse(self._client.responses)
 
-    @cached_property
-    def agents(self) -> agents.AgentsResourceWithStreamingResponse:
-        from .resources.agents import AgentsResourceWithStreamingResponse
-
-        return AgentsResourceWithStreamingResponse(self._client.agents)
-
     @cached_property
     def datasets(self) -> datasets.DatasetsResourceWithStreamingResponse:
         from .resources.datasets import DatasetsResourceWithStreamingResponse
@@ -1192,12 +1160,6 @@ def inspect(self) -> inspect.InspectResourceWithStreamingResponse:
 
         return InspectResourceWithStreamingResponse(self._client.inspect)
 
-    @cached_property
-    def inference(self) -> inference.InferenceResourceWithStreamingResponse:
-        from .resources.inference import InferenceResourceWithStreamingResponse
-
-        return InferenceResourceWithStreamingResponse(self._client.inference)
-
     @cached_property
     def embeddings(self) -> embeddings.EmbeddingsResourceWithStreamingResponse:
         from .resources.embeddings import EmbeddingsResourceWithStreamingResponse
@@ -1314,6 +1276,12 @@ def files(self) -> files.FilesResourceWithStreamingResponse:
 
         return FilesResourceWithStreamingResponse(self._client.files)
 
+    @cached_property
+    def alpha(self) -> alpha.AlphaResourceWithStreamingResponse:
+        from .resources.alpha import AlphaResourceWithStreamingResponse
+
+        return AlphaResourceWithStreamingResponse(self._client.alpha)
+
 
 class AsyncLlamaStackClientWithStreamedResponse:
     _client: AsyncLlamaStackClient
@@ -1345,12 +1313,6 @@ def responses(self) -> responses.AsyncResponsesResourceWithStreamingResponse:
 
         return AsyncResponsesResourceWithStreamingResponse(self._client.responses)
 
-    @cached_property
-    def agents(self) -> agents.AsyncAgentsResourceWithStreamingResponse:
-        from .resources.agents import AsyncAgentsResourceWithStreamingResponse
-
-        return AsyncAgentsResourceWithStreamingResponse(self._client.agents)
-
     @cached_property
     def datasets(self) -> datasets.AsyncDatasetsResourceWithStreamingResponse:
         from .resources.datasets import AsyncDatasetsResourceWithStreamingResponse
@@ -1369,12 +1331,6 @@ def inspect(self) -> inspect.AsyncInspectResourceWithStreamingResponse:
 
         return AsyncInspectResourceWithStreamingResponse(self._client.inspect)
 
-    @cached_property
-    def inference(self) -> inference.AsyncInferenceResourceWithStreamingResponse:
-        from .resources.inference import AsyncInferenceResourceWithStreamingResponse
-
-        return AsyncInferenceResourceWithStreamingResponse(self._client.inference)
-
     @cached_property
     def embeddings(self) -> embeddings.AsyncEmbeddingsResourceWithStreamingResponse:
         from .resources.embeddings import AsyncEmbeddingsResourceWithStreamingResponse
@@ -1491,6 +1447,12 @@ def files(self) -> files.AsyncFilesResourceWithStreamingResponse:
 
         return AsyncFilesResourceWithStreamingResponse(self._client.files)
 
+    @cached_property
+    def alpha(self) -> alpha.AsyncAlphaResourceWithStreamingResponse:
+        from .resources.alpha import AsyncAlphaResourceWithStreamingResponse
+
+        return AsyncAlphaResourceWithStreamingResponse(self._client.alpha)
+
 
 Client = LlamaStackClient
 
diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py
index 01e17f57..3ca1dedb 100644
--- a/src/llama_stack_client/resources/__init__.py
+++ b/src/llama_stack_client/resources/__init__.py
@@ -16,6 +16,14 @@
     EvalResourceWithStreamingResponse,
     AsyncEvalResourceWithStreamingResponse,
 )
+from .alpha import (
+    AlphaResource,
+    AsyncAlphaResource,
+    AlphaResourceWithRawResponse,
+    AsyncAlphaResourceWithRawResponse,
+    AlphaResourceWithStreamingResponse,
+    AsyncAlphaResourceWithStreamingResponse,
+)
 from .files import (
     FilesResource,
     AsyncFilesResource,
@@ -32,14 +40,6 @@
     ToolsResourceWithStreamingResponse,
     AsyncToolsResourceWithStreamingResponse,
 )
-from .agents import (
-    AgentsResource,
-    AsyncAgentsResource,
-    AgentsResourceWithRawResponse,
-    AsyncAgentsResourceWithRawResponse,
-    AgentsResourceWithStreamingResponse,
-    AsyncAgentsResourceWithStreamingResponse,
-)
 from .models import (
     ModelsResource,
     AsyncModelsResource,
@@ -96,14 +96,6 @@
     DatasetsResourceWithStreamingResponse,
     AsyncDatasetsResourceWithStreamingResponse,
 )
-from .inference import (
-    InferenceResource,
-    AsyncInferenceResource,
-    InferenceResourceWithRawResponse,
-    AsyncInferenceResourceWithRawResponse,
-    InferenceResourceWithStreamingResponse,
-    AsyncInferenceResourceWithStreamingResponse,
-)
 from .providers import (
     ProvidersResource,
     AsyncProvidersResource,
@@ -250,12 +242,6 @@
     "AsyncResponsesResourceWithRawResponse",
     "ResponsesResourceWithStreamingResponse",
     "AsyncResponsesResourceWithStreamingResponse",
-    "AgentsResource",
-    "AsyncAgentsResource",
-    "AgentsResourceWithRawResponse",
-    "AsyncAgentsResourceWithRawResponse",
-    "AgentsResourceWithStreamingResponse",
-    "AsyncAgentsResourceWithStreamingResponse",
     "DatasetsResource",
     "AsyncDatasetsResource",
     "DatasetsResourceWithRawResponse",
@@ -274,12 +260,6 @@
     "AsyncInspectResourceWithRawResponse",
     "InspectResourceWithStreamingResponse",
     "AsyncInspectResourceWithStreamingResponse",
-    "InferenceResource",
-    "AsyncInferenceResource",
-    "InferenceResourceWithRawResponse",
-    "AsyncInferenceResourceWithRawResponse",
-    "InferenceResourceWithStreamingResponse",
-    "AsyncInferenceResourceWithStreamingResponse",
     "EmbeddingsResource",
     "AsyncEmbeddingsResource",
     "EmbeddingsResourceWithRawResponse",
@@ -394,4 +374,10 @@
     "AsyncFilesResourceWithRawResponse",
     "FilesResourceWithStreamingResponse",
     "AsyncFilesResourceWithStreamingResponse",
+    "AlphaResource",
+    "AsyncAlphaResource",
+    "AlphaResourceWithRawResponse",
+    "AsyncAlphaResourceWithRawResponse",
+    "AlphaResourceWithStreamingResponse",
+    "AsyncAlphaResourceWithStreamingResponse",
 ]
diff --git a/src/llama_stack_client/resources/alpha/__init__.py b/src/llama_stack_client/resources/alpha/__init__.py
new file mode 100644
index 00000000..875a710b
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .alpha import (
+    AlphaResource,
+    AsyncAlphaResource,
+    AlphaResourceWithRawResponse,
+    AsyncAlphaResourceWithRawResponse,
+    AlphaResourceWithStreamingResponse,
+    AsyncAlphaResourceWithStreamingResponse,
+)
+from .agents import (
+    AgentsResource,
+    AsyncAgentsResource,
+    AgentsResourceWithRawResponse,
+    AsyncAgentsResourceWithRawResponse,
+    AgentsResourceWithStreamingResponse,
+    AsyncAgentsResourceWithStreamingResponse,
+)
+from .inference import (
+    InferenceResource,
+    AsyncInferenceResource,
+    InferenceResourceWithRawResponse,
+    AsyncInferenceResourceWithRawResponse,
+    InferenceResourceWithStreamingResponse,
+    AsyncInferenceResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "InferenceResource",
+    "AsyncInferenceResource",
+    "InferenceResourceWithRawResponse",
+    "AsyncInferenceResourceWithRawResponse",
+    "InferenceResourceWithStreamingResponse",
+    "AsyncInferenceResourceWithStreamingResponse",
+    "AgentsResource",
+    "AsyncAgentsResource",
+    "AgentsResourceWithRawResponse",
+    "AsyncAgentsResourceWithRawResponse",
+    "AgentsResourceWithStreamingResponse",
+    "AsyncAgentsResourceWithStreamingResponse",
+    "AlphaResource",
+    "AsyncAlphaResource",
+    "AlphaResourceWithRawResponse",
+    "AsyncAlphaResourceWithRawResponse",
+    "AlphaResourceWithStreamingResponse",
+    "AsyncAlphaResourceWithStreamingResponse",
+]
diff --git a/src/llama_stack_client/resources/agents/__init__.py b/src/llama_stack_client/resources/alpha/agents/__init__.py
similarity index 100%
rename from src/llama_stack_client/resources/agents/__init__.py
rename to src/llama_stack_client/resources/alpha/agents/__init__.py
diff --git a/src/llama_stack_client/resources/agents/agents.py b/src/llama_stack_client/resources/alpha/agents/agents.py
similarity index 96%
rename from src/llama_stack_client/resources/agents/agents.py
rename to src/llama_stack_client/resources/alpha/agents/agents.py
index 50d65a60..b168179c 100644
--- a/src/llama_stack_client/resources/agents/agents.py
+++ b/src/llama_stack_client/resources/alpha/agents/agents.py
@@ -20,7 +20,6 @@
     StepsResourceWithStreamingResponse,
     AsyncStepsResourceWithStreamingResponse,
 )
-from ...types import agent_list_params, agent_create_params
 from .session import (
     SessionResource,
     AsyncSessionResource,
@@ -29,21 +28,22 @@
     SessionResourceWithStreamingResponse,
     AsyncSessionResourceWithStreamingResponse,
 )
-from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._base_client import make_request_options
-from ...types.agent_list_response import AgentListResponse
-from ...types.agent_create_response import AgentCreateResponse
-from ...types.agent_retrieve_response import AgentRetrieveResponse
-from ...types.shared_params.agent_config import AgentConfig
+from ....types.alpha import agent_list_params, agent_create_params
+from ...._base_client import make_request_options
+from ....types.alpha.agent_list_response import AgentListResponse
+from ....types.shared_params.agent_config import AgentConfig
+from ....types.alpha.agent_create_response import AgentCreateResponse
+from ....types.alpha.agent_retrieve_response import AgentRetrieveResponse
 
 __all__ = ["AgentsResource", "AsyncAgentsResource"]
 
diff --git a/src/llama_stack_client/resources/agents/session.py b/src/llama_stack_client/resources/alpha/agents/session.py
similarity index 96%
rename from src/llama_stack_client/resources/agents/session.py
rename to src/llama_stack_client/resources/alpha/agents/session.py
index de5b35de..37b51cea 100644
--- a/src/llama_stack_client/resources/agents/session.py
+++ b/src/llama_stack_client/resources/alpha/agents/session.py
@@ -4,21 +4,21 @@
 
 import httpx
 
-from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, SequenceNotStr, omit, not_given
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Omit, Query, Headers, NoneType, NotGiven, SequenceNotStr, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._base_client import make_request_options
-from ...types.agents import session_list_params, session_create_params, session_retrieve_params
-from ...types.agents.session import Session
-from ...types.agents.session_list_response import SessionListResponse
-from ...types.agents.session_create_response import SessionCreateResponse
+from ...._base_client import make_request_options
+from ....types.alpha.agents import session_list_params, session_create_params, session_retrieve_params
+from ....types.alpha.agents.session import Session
+from ....types.alpha.agents.session_list_response import SessionListResponse
+from ....types.alpha.agents.session_create_response import SessionCreateResponse
 
 __all__ = ["SessionResource", "AsyncSessionResource"]
 
diff --git a/src/llama_stack_client/resources/agents/steps.py b/src/llama_stack_client/resources/alpha/agents/steps.py
similarity index 95%
rename from src/llama_stack_client/resources/agents/steps.py
rename to src/llama_stack_client/resources/alpha/agents/steps.py
index 94138edc..42aec9ab 100644
--- a/src/llama_stack_client/resources/agents/steps.py
+++ b/src/llama_stack_client/resources/alpha/agents/steps.py
@@ -4,17 +4,17 @@
 
 import httpx
 
-from ..._types import Body, Query, Headers, NotGiven, not_given
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Query, Headers, NotGiven, not_given
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._base_client import make_request_options
-from ...types.agents.step_retrieve_response import StepRetrieveResponse
+from ...._base_client import make_request_options
+from ....types.alpha.agents.step_retrieve_response import StepRetrieveResponse
 
 __all__ = ["StepsResource", "AsyncStepsResource"]
 
diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/alpha/agents/turn.py
similarity index 95%
rename from src/llama_stack_client/resources/agents/turn.py
rename to src/llama_stack_client/resources/alpha/agents/turn.py
index a10da847..e8b7e97a 100644
--- a/src/llama_stack_client/resources/agents/turn.py
+++ b/src/llama_stack_client/resources/alpha/agents/turn.py
@@ -7,22 +7,21 @@
 
 import httpx
 
-from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
-from ..._utils import required_args, maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ...._utils import required_args, maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._streaming import Stream, AsyncStream
-from ..._base_client import make_request_options
-from ...types.agents import turn_create_params, turn_resume_params
-from ...types.agents.turn import Turn
-from ...types.tool_response_param import ToolResponseParam
-from ...types.agents.agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk
+from ...._streaming import Stream, AsyncStream
+from ...._base_client import make_request_options
+from ....types.alpha.agents import turn_create_params, turn_resume_params
+from ....types.alpha.agents.turn import Turn
+from ....types.alpha.tool_response_param import ToolResponseParam
 
 __all__ = ["TurnResource", "AsyncTurnResource"]
 
@@ -109,7 +108,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Stream[AgentTurnResponseStreamChunk]:
+    ) -> Stream[object]:
         """
         Create a new turn for an agent.
 
@@ -154,7 +153,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
+    ) -> Turn | Stream[object]:
         """
         Create a new turn for an agent.
 
@@ -199,7 +198,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
+    ) -> Turn | Stream[object]:
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         if not session_id:
@@ -223,7 +222,7 @@ def create(
             ),
             cast_to=Turn,
             stream=stream or False,
-            stream_cls=Stream[AgentTurnResponseStreamChunk],
+            stream_cls=Stream[object],
         )
 
     def retrieve(
@@ -318,7 +317,7 @@ def resume(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Stream[AgentTurnResponseStreamChunk]:
+    ) -> Stream[object]:
         """Resume an agent turn with executed tool call responses.
 
         When a Turn has the
@@ -356,7 +355,7 @@ def resume(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
+    ) -> Turn | Stream[object]:
         """Resume an agent turn with executed tool call responses.
 
         When a Turn has the
@@ -394,7 +393,7 @@ def resume(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
+    ) -> Turn | Stream[object]:
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         if not session_id:
@@ -417,7 +416,7 @@ def resume(
             ),
             cast_to=Turn,
             stream=stream or False,
-            stream_cls=Stream[AgentTurnResponseStreamChunk],
+            stream_cls=Stream[object],
         )
 
 
@@ -503,7 +502,7 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AsyncStream[AgentTurnResponseStreamChunk]:
+    ) -> AsyncStream[object]:
         """
         Create a new turn for an agent.
 
@@ -548,7 +547,7 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
+    ) -> Turn | AsyncStream[object]:
         """
         Create a new turn for an agent.
 
@@ -593,7 +592,7 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
+    ) -> Turn | AsyncStream[object]:
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         if not session_id:
@@ -617,7 +616,7 @@ async def create(
             ),
             cast_to=Turn,
             stream=stream or False,
-            stream_cls=AsyncStream[AgentTurnResponseStreamChunk],
+            stream_cls=AsyncStream[object],
         )
 
     async def retrieve(
@@ -712,7 +711,7 @@ async def resume(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AsyncStream[AgentTurnResponseStreamChunk]:
+    ) -> AsyncStream[object]:
         """Resume an agent turn with executed tool call responses.
 
         When a Turn has the
@@ -750,7 +749,7 @@ async def resume(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
+    ) -> Turn | AsyncStream[object]:
         """Resume an agent turn with executed tool call responses.
 
         When a Turn has the
@@ -788,7 +787,7 @@ async def resume(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
+    ) -> Turn | AsyncStream[object]:
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         if not session_id:
@@ -811,7 +810,7 @@ async def resume(
             ),
             cast_to=Turn,
             stream=stream or False,
-            stream_cls=AsyncStream[AgentTurnResponseStreamChunk],
+            stream_cls=AsyncStream[object],
         )
 
 
diff --git a/src/llama_stack_client/resources/alpha/alpha.py b/src/llama_stack_client/resources/alpha/alpha.py
new file mode 100644
index 00000000..9d5cfbe9
--- /dev/null
+++ b/src/llama_stack_client/resources/alpha/alpha.py
@@ -0,0 +1,134 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from .inference import (
+    InferenceResource,
+    AsyncInferenceResource,
+    InferenceResourceWithRawResponse,
+    AsyncInferenceResourceWithRawResponse,
+    InferenceResourceWithStreamingResponse,
+    AsyncInferenceResourceWithStreamingResponse,
+)
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .agents.agents import (
+    AgentsResource,
+    AsyncAgentsResource,
+    AgentsResourceWithRawResponse,
+    AsyncAgentsResourceWithRawResponse,
+    AgentsResourceWithStreamingResponse,
+    AsyncAgentsResourceWithStreamingResponse,
+)
+
+__all__ = ["AlphaResource", "AsyncAlphaResource"]
+
+
+class AlphaResource(SyncAPIResource):
+    @cached_property
+    def inference(self) -> InferenceResource:
+        return InferenceResource(self._client)
+
+    @cached_property
+    def agents(self) -> AgentsResource:
+        return AgentsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AlphaResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AlphaResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AlphaResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AlphaResourceWithStreamingResponse(self)
+
+
+class AsyncAlphaResource(AsyncAPIResource):
+    @cached_property
+    def inference(self) -> AsyncInferenceResource:
+        return AsyncInferenceResource(self._client)
+
+    @cached_property
+    def agents(self) -> AsyncAgentsResource:
+        return AsyncAgentsResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncAlphaResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAlphaResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAlphaResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        """
+        return AsyncAlphaResourceWithStreamingResponse(self)
+
+
+class AlphaResourceWithRawResponse:
+    def __init__(self, alpha: AlphaResource) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def inference(self) -> InferenceResourceWithRawResponse:
+        return InferenceResourceWithRawResponse(self._alpha.inference)
+
+    @cached_property
+    def agents(self) -> AgentsResourceWithRawResponse:
+        return AgentsResourceWithRawResponse(self._alpha.agents)
+
+
+class AsyncAlphaResourceWithRawResponse:
+    def __init__(self, alpha: AsyncAlphaResource) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def inference(self) -> AsyncInferenceResourceWithRawResponse:
+        return AsyncInferenceResourceWithRawResponse(self._alpha.inference)
+
+    @cached_property
+    def agents(self) -> AsyncAgentsResourceWithRawResponse:
+        return AsyncAgentsResourceWithRawResponse(self._alpha.agents)
+
+
+class AlphaResourceWithStreamingResponse:
+    def __init__(self, alpha: AlphaResource) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def inference(self) -> InferenceResourceWithStreamingResponse:
+        return InferenceResourceWithStreamingResponse(self._alpha.inference)
+
+    @cached_property
+    def agents(self) -> AgentsResourceWithStreamingResponse:
+        return AgentsResourceWithStreamingResponse(self._alpha.agents)
+
+
+class AsyncAlphaResourceWithStreamingResponse:
+    def __init__(self, alpha: AsyncAlphaResource) -> None:
+        self._alpha = alpha
+
+    @cached_property
+    def inference(self) -> AsyncInferenceResourceWithStreamingResponse:
+        return AsyncInferenceResourceWithStreamingResponse(self._alpha.inference)
+
+    @cached_property
+    def agents(self) -> AsyncAgentsResourceWithStreamingResponse:
+        return AsyncAgentsResourceWithStreamingResponse(self._alpha.agents)
diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/alpha/inference.py
similarity index 94%
rename from src/llama_stack_client/resources/inference.py
rename to src/llama_stack_client/resources/alpha/inference.py
index e5cf7b6b..ca259357 100644
--- a/src/llama_stack_client/resources/inference.py
+++ b/src/llama_stack_client/resources/alpha/inference.py
@@ -6,20 +6,20 @@
 
 import httpx
 
-from ..types import inference_rerank_params
-from .._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
+from ..._types import Body, Omit, Query, Headers, NotGiven, SequenceNotStr, omit, not_given
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from .._wrappers import DataWrapper
-from .._base_client import make_request_options
-from ..types.inference_rerank_response import InferenceRerankResponse
+from ..._wrappers import DataWrapper
+from ...types.alpha import inference_rerank_params
+from ..._base_client import make_request_options
+from ...types.alpha.inference_rerank_response import InferenceRerankResponse
 
 __all__ = ["InferenceResource", "AsyncInferenceResource"]
 
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index f81ada61..2f955f5e 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -8,7 +8,6 @@
 from .model import Model as Model
 from .trace import Trace as Trace
 from .shared import (
-    Metric as Metric,
     Message as Message,
     Document as Document,
     ToolCall as ToolCall,
@@ -24,9 +23,7 @@
     SafetyViolation as SafetyViolation,
     CompletionMessage as CompletionMessage,
     InterleavedContent as InterleavedContent,
-    ToolParamDefinition as ToolParamDefinition,
     ToolResponseMessage as ToolResponseMessage,
-    ChatCompletionResponse as ChatCompletionResponse,
     InterleavedContentItem as InterleavedContentItem,
 )
 from .shield import Shield as Shield
@@ -40,30 +37,22 @@
 from .vector_store import VectorStore as VectorStore
 from .version_info import VersionInfo as VersionInfo
 from .provider_info import ProviderInfo as ProviderInfo
-from .tool_response import ToolResponse as ToolResponse
-from .inference_step import InferenceStep as InferenceStep
 from .tool_def_param import ToolDefParam as ToolDefParam
 from .create_response import CreateResponse as CreateResponse
 from .response_object import ResponseObject as ResponseObject
 from .file_list_params import FileListParams as FileListParams
-from .shield_call_step import ShieldCallStep as ShieldCallStep
 from .span_with_status import SpanWithStatus as SpanWithStatus
 from .tool_list_params import ToolListParams as ToolListParams
-from .agent_list_params import AgentListParams as AgentListParams
 from .evaluate_response import EvaluateResponse as EvaluateResponse
 from .post_training_job import PostTrainingJob as PostTrainingJob
 from .scoring_fn_params import ScoringFnParams as ScoringFnParams
 from .file_create_params import FileCreateParams as FileCreateParams
 from .tool_list_response import ToolListResponse as ToolListResponse
-from .agent_create_params import AgentCreateParams as AgentCreateParams
-from .agent_list_response import AgentListResponse as AgentListResponse
 from .list_files_response import ListFilesResponse as ListFilesResponse
 from .list_tools_response import ListToolsResponse as ListToolsResponse
 from .model_list_response import ModelListResponse as ModelListResponse
 from .route_list_response import RouteListResponse as RouteListResponse
 from .run_shield_response import RunShieldResponse as RunShieldResponse
-from .tool_execution_step import ToolExecutionStep as ToolExecutionStep
-from .tool_response_param import ToolResponseParam as ToolResponseParam
 from .delete_file_response import DeleteFileResponse as DeleteFileResponse
 from .eval_run_eval_params import EvalRunEvalParams as EvalRunEvalParams
 from .list_models_response import ListModelsResponse as ListModelsResponse
@@ -72,11 +61,9 @@
 from .response_list_params import ResponseListParams as ResponseListParams
 from .scoring_score_params import ScoringScoreParams as ScoringScoreParams
 from .shield_list_response import ShieldListResponse as ShieldListResponse
-from .agent_create_response import AgentCreateResponse as AgentCreateResponse
 from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
 from .dataset_list_response import DatasetListResponse as DatasetListResponse
 from .list_shields_response import ListShieldsResponse as ListShieldsResponse
-from .memory_retrieval_step import MemoryRetrievalStep as MemoryRetrievalStep
 from .model_register_params import ModelRegisterParams as ModelRegisterParams
 from .query_chunks_response import QueryChunksResponse as QueryChunksResponse
 from .query_condition_param import QueryConditionParam as QueryConditionParam
@@ -91,12 +78,10 @@
 from .shield_register_params import ShieldRegisterParams as ShieldRegisterParams
 from .tool_invocation_result import ToolInvocationResult as ToolInvocationResult
 from .vector_io_query_params import VectorIoQueryParams as VectorIoQueryParams
-from .agent_retrieve_response import AgentRetrieveResponse as AgentRetrieveResponse
 from .benchmark_list_response import BenchmarkListResponse as BenchmarkListResponse
 from .dataset_iterrows_params import DatasetIterrowsParams as DatasetIterrowsParams
 from .dataset_register_params import DatasetRegisterParams as DatasetRegisterParams
 from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
-from .inference_rerank_params import InferenceRerankParams as InferenceRerankParams
 from .list_providers_response import ListProvidersResponse as ListProvidersResponse
 from .scoring_fn_params_param import ScoringFnParamsParam as ScoringFnParamsParam
 from .toolgroup_list_response import ToolgroupListResponse as ToolgroupListResponse
@@ -115,7 +100,6 @@
 from .dataset_register_response import DatasetRegisterResponse as DatasetRegisterResponse
 from .dataset_retrieve_response import DatasetRetrieveResponse as DatasetRetrieveResponse
 from .eval_evaluate_rows_params import EvalEvaluateRowsParams as EvalEvaluateRowsParams
-from .inference_rerank_response import InferenceRerankResponse as InferenceRerankResponse
 from .list_tool_groups_response import ListToolGroupsResponse as ListToolGroupsResponse
 from .toolgroup_register_params import ToolgroupRegisterParams as ToolgroupRegisterParams
 from .vector_db_register_params import VectorDBRegisterParams as VectorDBRegisterParams
diff --git a/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py b/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py
deleted file mode 100644
index 1ce1b8a7..00000000
--- a/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py
+++ /dev/null
@@ -1,11 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ..._models import BaseModel
-from .turn_response_event import TurnResponseEvent
-
-__all__ = ["AgentTurnResponseStreamChunk"]
-
-
-class AgentTurnResponseStreamChunk(BaseModel):
-    event: TurnResponseEvent
-    """Individual event in the agent turn response stream"""
diff --git a/src/llama_stack_client/types/agents/turn_response_event.py b/src/llama_stack_client/types/agents/turn_response_event.py
deleted file mode 100644
index c52121ab..00000000
--- a/src/llama_stack_client/types/agents/turn_response_event.py
+++ /dev/null
@@ -1,160 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from .turn import Turn
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-from ..inference_step import InferenceStep
-from ..shared.tool_call import ToolCall
-from ..shield_call_step import ShieldCallStep
-from ..tool_execution_step import ToolExecutionStep
-from ..memory_retrieval_step import MemoryRetrievalStep
-
-__all__ = [
-    "TurnResponseEvent",
-    "Payload",
-    "PayloadAgentTurnResponseStepStartPayload",
-    "PayloadAgentTurnResponseStepProgressPayload",
-    "PayloadAgentTurnResponseStepProgressPayloadDelta",
-    "PayloadAgentTurnResponseStepProgressPayloadDeltaTextDelta",
-    "PayloadAgentTurnResponseStepProgressPayloadDeltaImageDelta",
-    "PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDelta",
-    "PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDeltaToolCall",
-    "PayloadAgentTurnResponseStepCompletePayload",
-    "PayloadAgentTurnResponseStepCompletePayloadStepDetails",
-    "PayloadAgentTurnResponseTurnStartPayload",
-    "PayloadAgentTurnResponseTurnCompletePayload",
-    "PayloadAgentTurnResponseTurnAwaitingInputPayload",
-]
-
-
-class PayloadAgentTurnResponseStepStartPayload(BaseModel):
-    event_type: Literal["step_start"]
-    """Type of event being reported"""
-
-    step_id: str
-    """Unique identifier for the step within a turn"""
-
-    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
-    """Type of step being executed"""
-
-    metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-    """(Optional) Additional metadata for the step"""
-
-
-class PayloadAgentTurnResponseStepProgressPayloadDeltaTextDelta(BaseModel):
-    text: str
-    """The incremental text content"""
-
-    type: Literal["text"]
-    """Discriminator type of the delta. Always "text" """
-
-
-class PayloadAgentTurnResponseStepProgressPayloadDeltaImageDelta(BaseModel):
-    image: str
-    """The incremental image data as bytes"""
-
-    type: Literal["image"]
-    """Discriminator type of the delta. Always "image" """
-
-
-PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDeltaToolCall: TypeAlias = Union[str, ToolCall]
-
-
-class PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDelta(BaseModel):
-    parse_status: Literal["started", "in_progress", "failed", "succeeded"]
-    """Current parsing status of the tool call"""
-
-    tool_call: PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDeltaToolCall
-    """Either an in-progress tool call string or the final parsed tool call"""
-
-    type: Literal["tool_call"]
-    """Discriminator type of the delta. Always "tool_call" """
-
-
-PayloadAgentTurnResponseStepProgressPayloadDelta: TypeAlias = Annotated[
-    Union[
-        PayloadAgentTurnResponseStepProgressPayloadDeltaTextDelta,
-        PayloadAgentTurnResponseStepProgressPayloadDeltaImageDelta,
-        PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDelta,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class PayloadAgentTurnResponseStepProgressPayload(BaseModel):
-    delta: PayloadAgentTurnResponseStepProgressPayloadDelta
-    """Incremental content changes during step execution"""
-
-    event_type: Literal["step_progress"]
-    """Type of event being reported"""
-
-    step_id: str
-    """Unique identifier for the step within a turn"""
-
-    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
-    """Type of step being executed"""
-
-
-PayloadAgentTurnResponseStepCompletePayloadStepDetails: TypeAlias = Annotated[
-    Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep],
-    PropertyInfo(discriminator="step_type"),
-]
-
-
-class PayloadAgentTurnResponseStepCompletePayload(BaseModel):
-    event_type: Literal["step_complete"]
-    """Type of event being reported"""
-
-    step_details: PayloadAgentTurnResponseStepCompletePayloadStepDetails
-    """Complete details of the executed step"""
-
-    step_id: str
-    """Unique identifier for the step within a turn"""
-
-    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
-    """Type of step being executed"""
-
-
-class PayloadAgentTurnResponseTurnStartPayload(BaseModel):
-    event_type: Literal["turn_start"]
-    """Type of event being reported"""
-
-    turn_id: str
-    """Unique identifier for the turn within a session"""
-
-
-class PayloadAgentTurnResponseTurnCompletePayload(BaseModel):
-    event_type: Literal["turn_complete"]
-    """Type of event being reported"""
-
-    turn: Turn
-    """Complete turn data including all steps and results"""
-
-
-class PayloadAgentTurnResponseTurnAwaitingInputPayload(BaseModel):
-    event_type: Literal["turn_awaiting_input"]
-    """Type of event being reported"""
-
-    turn: Turn
-    """Turn data when waiting for external tool responses"""
-
-
-Payload: TypeAlias = Annotated[
-    Union[
-        PayloadAgentTurnResponseStepStartPayload,
-        PayloadAgentTurnResponseStepProgressPayload,
-        PayloadAgentTurnResponseStepCompletePayload,
-        PayloadAgentTurnResponseTurnStartPayload,
-        PayloadAgentTurnResponseTurnCompletePayload,
-        PayloadAgentTurnResponseTurnAwaitingInputPayload,
-    ],
-    PropertyInfo(discriminator="event_type"),
-]
-
-
-class TurnResponseEvent(BaseModel):
-    payload: Payload
-    """Event-specific payload containing event data"""
diff --git a/src/llama_stack_client/types/alpha/__init__.py b/src/llama_stack_client/types/alpha/__init__.py
new file mode 100644
index 00000000..0740f223
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/__init__.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .tool_response import ToolResponse as ToolResponse
+from .inference_step import InferenceStep as InferenceStep
+from .shield_call_step import ShieldCallStep as ShieldCallStep
+from .agent_list_params import AgentListParams as AgentListParams
+from .agent_create_params import AgentCreateParams as AgentCreateParams
+from .agent_list_response import AgentListResponse as AgentListResponse
+from .tool_execution_step import ToolExecutionStep as ToolExecutionStep
+from .tool_response_param import ToolResponseParam as ToolResponseParam
+from .agent_create_response import AgentCreateResponse as AgentCreateResponse
+from .memory_retrieval_step import MemoryRetrievalStep as MemoryRetrievalStep
+from .agent_retrieve_response import AgentRetrieveResponse as AgentRetrieveResponse
+from .inference_rerank_params import InferenceRerankParams as InferenceRerankParams
+from .inference_rerank_response import InferenceRerankResponse as InferenceRerankResponse
diff --git a/src/llama_stack_client/types/agent_create_params.py b/src/llama_stack_client/types/alpha/agent_create_params.py
similarity index 86%
rename from src/llama_stack_client/types/agent_create_params.py
rename to src/llama_stack_client/types/alpha/agent_create_params.py
index 525cf1e2..368704b2 100644
--- a/src/llama_stack_client/types/agent_create_params.py
+++ b/src/llama_stack_client/types/alpha/agent_create_params.py
@@ -4,7 +4,7 @@
 
 from typing_extensions import Required, TypedDict
 
-from .shared_params.agent_config import AgentConfig
+from ..shared_params.agent_config import AgentConfig
 
 __all__ = ["AgentCreateParams"]
 
diff --git a/src/llama_stack_client/types/agent_create_response.py b/src/llama_stack_client/types/alpha/agent_create_response.py
similarity index 87%
rename from src/llama_stack_client/types/agent_create_response.py
rename to src/llama_stack_client/types/alpha/agent_create_response.py
index 24fe864e..9b155198 100644
--- a/src/llama_stack_client/types/agent_create_response.py
+++ b/src/llama_stack_client/types/alpha/agent_create_response.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["AgentCreateResponse"]
 
diff --git a/src/llama_stack_client/types/agent_list_params.py b/src/llama_stack_client/types/alpha/agent_list_params.py
similarity index 100%
rename from src/llama_stack_client/types/agent_list_params.py
rename to src/llama_stack_client/types/alpha/agent_list_params.py
diff --git a/src/llama_stack_client/types/agent_list_response.py b/src/llama_stack_client/types/alpha/agent_list_response.py
similarity index 93%
rename from src/llama_stack_client/types/agent_list_response.py
rename to src/llama_stack_client/types/alpha/agent_list_response.py
index d0640e21..69de5001 100644
--- a/src/llama_stack_client/types/agent_list_response.py
+++ b/src/llama_stack_client/types/alpha/agent_list_response.py
@@ -2,7 +2,7 @@
 
 from typing import Dict, List, Union, Optional
 
-from .._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["AgentListResponse"]
 
diff --git a/src/llama_stack_client/types/agent_retrieve_response.py b/src/llama_stack_client/types/alpha/agent_retrieve_response.py
similarity index 83%
rename from src/llama_stack_client/types/agent_retrieve_response.py
rename to src/llama_stack_client/types/alpha/agent_retrieve_response.py
index 1671a9fc..87d79b7b 100644
--- a/src/llama_stack_client/types/agent_retrieve_response.py
+++ b/src/llama_stack_client/types/alpha/agent_retrieve_response.py
@@ -2,8 +2,8 @@
 
 from datetime import datetime
 
-from .._models import BaseModel
-from .shared.agent_config import AgentConfig
+from ..._models import BaseModel
+from ..shared.agent_config import AgentConfig
 
 __all__ = ["AgentRetrieveResponse"]
 
diff --git a/src/llama_stack_client/types/agents/__init__.py b/src/llama_stack_client/types/alpha/agents/__init__.py
similarity index 81%
rename from src/llama_stack_client/types/agents/__init__.py
rename to src/llama_stack_client/types/alpha/agents/__init__.py
index 3a144840..2a4f602d 100644
--- a/src/llama_stack_client/types/agents/__init__.py
+++ b/src/llama_stack_client/types/alpha/agents/__init__.py
@@ -7,10 +7,8 @@
 from .turn_create_params import TurnCreateParams as TurnCreateParams
 from .turn_resume_params import TurnResumeParams as TurnResumeParams
 from .session_list_params import SessionListParams as SessionListParams
-from .turn_response_event import TurnResponseEvent as TurnResponseEvent
 from .session_create_params import SessionCreateParams as SessionCreateParams
 from .session_list_response import SessionListResponse as SessionListResponse
 from .step_retrieve_response import StepRetrieveResponse as StepRetrieveResponse
 from .session_create_response import SessionCreateResponse as SessionCreateResponse
 from .session_retrieve_params import SessionRetrieveParams as SessionRetrieveParams
-from .agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk as AgentTurnResponseStreamChunk
diff --git a/src/llama_stack_client/types/agents/session.py b/src/llama_stack_client/types/alpha/agents/session.py
similarity index 93%
rename from src/llama_stack_client/types/agents/session.py
rename to src/llama_stack_client/types/alpha/agents/session.py
index 1d3d697e..9b60853a 100644
--- a/src/llama_stack_client/types/agents/session.py
+++ b/src/llama_stack_client/types/alpha/agents/session.py
@@ -4,7 +4,7 @@
 from datetime import datetime
 
 from .turn import Turn
-from ..._models import BaseModel
+from ...._models import BaseModel
 
 __all__ = ["Session"]
 
diff --git a/src/llama_stack_client/types/agents/session_create_params.py b/src/llama_stack_client/types/alpha/agents/session_create_params.py
similarity index 100%
rename from src/llama_stack_client/types/agents/session_create_params.py
rename to src/llama_stack_client/types/alpha/agents/session_create_params.py
diff --git a/src/llama_stack_client/types/agents/session_create_response.py b/src/llama_stack_client/types/alpha/agents/session_create_response.py
similarity index 87%
rename from src/llama_stack_client/types/agents/session_create_response.py
rename to src/llama_stack_client/types/alpha/agents/session_create_response.py
index e7fe2a06..7d30c61a 100644
--- a/src/llama_stack_client/types/agents/session_create_response.py
+++ b/src/llama_stack_client/types/alpha/agents/session_create_response.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from ..._models import BaseModel
+from ...._models import BaseModel
 
 __all__ = ["SessionCreateResponse"]
 
diff --git a/src/llama_stack_client/types/agents/session_list_params.py b/src/llama_stack_client/types/alpha/agents/session_list_params.py
similarity index 100%
rename from src/llama_stack_client/types/agents/session_list_params.py
rename to src/llama_stack_client/types/alpha/agents/session_list_params.py
diff --git a/src/llama_stack_client/types/agents/session_list_response.py b/src/llama_stack_client/types/alpha/agents/session_list_response.py
similarity index 93%
rename from src/llama_stack_client/types/agents/session_list_response.py
rename to src/llama_stack_client/types/alpha/agents/session_list_response.py
index e70ecc46..23a51baf 100644
--- a/src/llama_stack_client/types/agents/session_list_response.py
+++ b/src/llama_stack_client/types/alpha/agents/session_list_response.py
@@ -2,7 +2,7 @@
 
 from typing import Dict, List, Union, Optional
 
-from ..._models import BaseModel
+from ...._models import BaseModel
 
 __all__ = ["SessionListResponse"]
 
diff --git a/src/llama_stack_client/types/agents/session_retrieve_params.py b/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
similarity index 91%
rename from src/llama_stack_client/types/agents/session_retrieve_params.py
rename to src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
index aeff0ff7..116190cc 100644
--- a/src/llama_stack_client/types/agents/session_retrieve_params.py
+++ b/src/llama_stack_client/types/alpha/agents/session_retrieve_params.py
@@ -4,7 +4,7 @@
 
 from typing_extensions import Required, TypedDict
 
-from ..._types import SequenceNotStr
+from ...._types import SequenceNotStr
 
 __all__ = ["SessionRetrieveParams"]
 
diff --git a/src/llama_stack_client/types/agents/step_retrieve_response.py b/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
similarity index 90%
rename from src/llama_stack_client/types/agents/step_retrieve_response.py
rename to src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
index 10fc13d2..55b64355 100644
--- a/src/llama_stack_client/types/agents/step_retrieve_response.py
+++ b/src/llama_stack_client/types/alpha/agents/step_retrieve_response.py
@@ -3,8 +3,8 @@
 from typing import Union
 from typing_extensions import Annotated, TypeAlias
 
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
 from ..inference_step import InferenceStep
 from ..shield_call_step import ShieldCallStep
 from ..tool_execution_step import ToolExecutionStep
diff --git a/src/llama_stack_client/types/agents/turn.py b/src/llama_stack_client/types/alpha/agents/turn.py
similarity index 91%
rename from src/llama_stack_client/types/agents/turn.py
rename to src/llama_stack_client/types/alpha/agents/turn.py
index 386d7f78..74ef22aa 100644
--- a/src/llama_stack_client/types/agents/turn.py
+++ b/src/llama_stack_client/types/alpha/agents/turn.py
@@ -4,16 +4,16 @@
 from datetime import datetime
 from typing_extensions import Literal, Annotated, TypeAlias
 
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
 from ..inference_step import InferenceStep
 from ..shield_call_step import ShieldCallStep
-from ..shared.user_message import UserMessage
 from ..tool_execution_step import ToolExecutionStep
+from ...shared.user_message import UserMessage
 from ..memory_retrieval_step import MemoryRetrievalStep
-from ..shared.completion_message import CompletionMessage
-from ..shared.tool_response_message import ToolResponseMessage
-from ..shared.interleaved_content_item import InterleavedContentItem
+from ...shared.completion_message import CompletionMessage
+from ...shared.tool_response_message import ToolResponseMessage
+from ...shared.interleaved_content_item import InterleavedContentItem
 
 __all__ = [
     "Turn",
diff --git a/src/llama_stack_client/types/agents/turn_create_params.py b/src/llama_stack_client/types/alpha/agents/turn_create_params.py
similarity index 95%
rename from src/llama_stack_client/types/agents/turn_create_params.py
rename to src/llama_stack_client/types/alpha/agents/turn_create_params.py
index 8c8e4999..7225959a 100644
--- a/src/llama_stack_client/types/agents/turn_create_params.py
+++ b/src/llama_stack_client/types/alpha/agents/turn_create_params.py
@@ -5,10 +5,10 @@
 from typing import Dict, Union, Iterable
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from ..._types import SequenceNotStr
-from ..shared_params.user_message import UserMessage
-from ..shared_params.tool_response_message import ToolResponseMessage
-from ..shared_params.interleaved_content_item import InterleavedContentItem
+from ...._types import SequenceNotStr
+from ...shared_params.user_message import UserMessage
+from ...shared_params.tool_response_message import ToolResponseMessage
+from ...shared_params.interleaved_content_item import InterleavedContentItem
 
 __all__ = [
     "TurnCreateParamsBase",
diff --git a/src/llama_stack_client/types/agents/turn_resume_params.py b/src/llama_stack_client/types/alpha/agents/turn_resume_params.py
similarity index 100%
rename from src/llama_stack_client/types/agents/turn_resume_params.py
rename to src/llama_stack_client/types/alpha/agents/turn_resume_params.py
diff --git a/src/llama_stack_client/types/inference_rerank_params.py b/src/llama_stack_client/types/alpha/inference_rerank_params.py
similarity index 98%
rename from src/llama_stack_client/types/inference_rerank_params.py
rename to src/llama_stack_client/types/alpha/inference_rerank_params.py
index 8f8c4d64..4c506240 100644
--- a/src/llama_stack_client/types/inference_rerank_params.py
+++ b/src/llama_stack_client/types/alpha/inference_rerank_params.py
@@ -5,7 +5,7 @@
 from typing import Union
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from .._types import SequenceNotStr
+from ..._types import SequenceNotStr
 
 __all__ = [
     "InferenceRerankParams",
diff --git a/src/llama_stack_client/types/inference_rerank_response.py b/src/llama_stack_client/types/alpha/inference_rerank_response.py
similarity index 94%
rename from src/llama_stack_client/types/inference_rerank_response.py
rename to src/llama_stack_client/types/alpha/inference_rerank_response.py
index e74fc7e6..391f8a3b 100644
--- a/src/llama_stack_client/types/inference_rerank_response.py
+++ b/src/llama_stack_client/types/alpha/inference_rerank_response.py
@@ -3,7 +3,7 @@
 from typing import List
 from typing_extensions import TypeAlias
 
-from .._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["InferenceRerankResponse", "InferenceRerankResponseItem"]
 
diff --git a/src/llama_stack_client/types/inference_step.py b/src/llama_stack_client/types/alpha/inference_step.py
similarity index 89%
rename from src/llama_stack_client/types/inference_step.py
rename to src/llama_stack_client/types/alpha/inference_step.py
index 2aecb193..a7e446d1 100644
--- a/src/llama_stack_client/types/inference_step.py
+++ b/src/llama_stack_client/types/alpha/inference_step.py
@@ -6,8 +6,8 @@
 
 from pydantic import Field as FieldInfo
 
-from .._models import BaseModel
-from .shared.completion_message import CompletionMessage
+from ..._models import BaseModel
+from ..shared.completion_message import CompletionMessage
 
 __all__ = ["InferenceStep"]
 
diff --git a/src/llama_stack_client/types/memory_retrieval_step.py b/src/llama_stack_client/types/alpha/memory_retrieval_step.py
similarity index 89%
rename from src/llama_stack_client/types/memory_retrieval_step.py
rename to src/llama_stack_client/types/alpha/memory_retrieval_step.py
index 887e9986..3d44dee0 100644
--- a/src/llama_stack_client/types/memory_retrieval_step.py
+++ b/src/llama_stack_client/types/alpha/memory_retrieval_step.py
@@ -4,8 +4,8 @@
 from datetime import datetime
 from typing_extensions import Literal
 
-from .._models import BaseModel
-from .shared.interleaved_content import InterleavedContent
+from ..._models import BaseModel
+from ..shared.interleaved_content import InterleavedContent
 
 __all__ = ["MemoryRetrievalStep"]
 
diff --git a/src/llama_stack_client/types/shield_call_step.py b/src/llama_stack_client/types/alpha/shield_call_step.py
similarity index 88%
rename from src/llama_stack_client/types/shield_call_step.py
rename to src/llama_stack_client/types/alpha/shield_call_step.py
index e19734c6..80176555 100644
--- a/src/llama_stack_client/types/shield_call_step.py
+++ b/src/llama_stack_client/types/alpha/shield_call_step.py
@@ -4,8 +4,8 @@
 from datetime import datetime
 from typing_extensions import Literal
 
-from .._models import BaseModel
-from .shared.safety_violation import SafetyViolation
+from ..._models import BaseModel
+from ..shared.safety_violation import SafetyViolation
 
 __all__ = ["ShieldCallStep"]
 
diff --git a/src/llama_stack_client/types/tool_execution_step.py b/src/llama_stack_client/types/alpha/tool_execution_step.py
similarity index 91%
rename from src/llama_stack_client/types/tool_execution_step.py
rename to src/llama_stack_client/types/alpha/tool_execution_step.py
index f68115fc..1761e889 100644
--- a/src/llama_stack_client/types/tool_execution_step.py
+++ b/src/llama_stack_client/types/alpha/tool_execution_step.py
@@ -4,9 +4,9 @@
 from datetime import datetime
 from typing_extensions import Literal
 
-from .._models import BaseModel
+from ..._models import BaseModel
 from .tool_response import ToolResponse
-from .shared.tool_call import ToolCall
+from ..shared.tool_call import ToolCall
 
 __all__ = ["ToolExecutionStep"]
 
diff --git a/src/llama_stack_client/types/tool_response.py b/src/llama_stack_client/types/alpha/tool_response.py
similarity index 88%
rename from src/llama_stack_client/types/tool_response.py
rename to src/llama_stack_client/types/alpha/tool_response.py
index 7750494e..fb749f75 100644
--- a/src/llama_stack_client/types/tool_response.py
+++ b/src/llama_stack_client/types/alpha/tool_response.py
@@ -3,8 +3,8 @@
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal
 
-from .._models import BaseModel
-from .shared.interleaved_content import InterleavedContent
+from ..._models import BaseModel
+from ..shared.interleaved_content import InterleavedContent
 
 __all__ = ["ToolResponse"]
 
diff --git a/src/llama_stack_client/types/tool_response_param.py b/src/llama_stack_client/types/alpha/tool_response_param.py
similarity index 92%
rename from src/llama_stack_client/types/tool_response_param.py
rename to src/llama_stack_client/types/alpha/tool_response_param.py
index 386658f9..e833211f 100644
--- a/src/llama_stack_client/types/tool_response_param.py
+++ b/src/llama_stack_client/types/alpha/tool_response_param.py
@@ -5,7 +5,7 @@
 from typing import Dict, Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
-from .shared_params.interleaved_content import InterleavedContent
+from ..shared_params.interleaved_content import InterleavedContent
 
 __all__ = ["ToolResponseParam"]
 
diff --git a/src/llama_stack_client/types/shared/__init__.py b/src/llama_stack_client/types/shared/__init__.py
index f346cda7..2d353f89 100644
--- a/src/llama_stack_client/types/shared/__init__.py
+++ b/src/llama_stack_client/types/shared/__init__.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .metric import Metric as Metric
 from .message import Message as Message
 from .document import Document as Document
 from .tool_call import ToolCall as ToolCall
@@ -16,7 +15,5 @@
 from .safety_violation import SafetyViolation as SafetyViolation
 from .completion_message import CompletionMessage as CompletionMessage
 from .interleaved_content import InterleavedContent as InterleavedContent
-from .tool_param_definition import ToolParamDefinition as ToolParamDefinition
 from .tool_response_message import ToolResponseMessage as ToolResponseMessage
-from .chat_completion_response import ChatCompletionResponse as ChatCompletionResponse
 from .interleaved_content_item import InterleavedContentItem as InterleavedContentItem
diff --git a/src/llama_stack_client/types/shared/chat_completion_response.py b/src/llama_stack_client/types/shared/chat_completion_response.py
deleted file mode 100644
index eb78a109..00000000
--- a/src/llama_stack_client/types/shared/chat_completion_response.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Optional
-
-from .metric import Metric
-from ..._models import BaseModel
-from .completion_message import CompletionMessage
-
-__all__ = ["ChatCompletionResponse", "Logprob"]
-
-
-class Logprob(BaseModel):
-    logprobs_by_token: Dict[str, float]
-    """Dictionary mapping tokens to their log probabilities"""
-
-
-class ChatCompletionResponse(BaseModel):
-    completion_message: CompletionMessage
-    """The complete response message"""
-
-    logprobs: Optional[List[Logprob]] = None
-    """Optional log probabilities for generated tokens"""
-
-    metrics: Optional[List[Metric]] = None
-    """(Optional) List of metrics associated with the API response"""
diff --git a/src/llama_stack_client/types/shared/metric.py b/src/llama_stack_client/types/shared/metric.py
deleted file mode 100644
index 66ecdaf8..00000000
--- a/src/llama_stack_client/types/shared/metric.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-
-from ..._models import BaseModel
-
-__all__ = ["Metric"]
-
-
-class Metric(BaseModel):
-    metric: str
-    """The name of the metric"""
-
-    value: float
-    """The numeric value of the metric"""
-
-    unit: Optional[str] = None
-    """(Optional) The unit of measurement for the metric value"""
diff --git a/src/llama_stack_client/types/shared/tool_param_definition.py b/src/llama_stack_client/types/shared/tool_param_definition.py
deleted file mode 100644
index 316f1e01..00000000
--- a/src/llama_stack_client/types/shared/tool_param_definition.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-
-from ..._models import BaseModel
-
-__all__ = ["ToolParamDefinition"]
-
-
-class ToolParamDefinition(BaseModel):
-    param_type: str
-
-    default: Union[bool, float, str, List[object], object, None] = None
-
-    description: Optional[str] = None
-
-    items: Union[bool, float, str, List[object], object, None] = None
-
-    required: Optional[bool] = None
-
-    title: Optional[str] = None
diff --git a/tests/api_resources/agents/__init__.py b/tests/api_resources/alpha/__init__.py
similarity index 100%
rename from tests/api_resources/agents/__init__.py
rename to tests/api_resources/alpha/__init__.py
diff --git a/tests/api_resources/alpha/agents/__init__.py b/tests/api_resources/alpha/agents/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/alpha/agents/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/agents/test_session.py b/tests/api_resources/alpha/agents/test_session.py
similarity index 82%
rename from tests/api_resources/agents/test_session.py
rename to tests/api_resources/alpha/agents/test_session.py
index b49ab492..9c49e6bc 100644
--- a/tests/api_resources/agents/test_session.py
+++ b/tests/api_resources/alpha/agents/test_session.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.agents import (
+from llama_stack_client.types.alpha.agents import (
     Session,
     SessionListResponse,
     SessionCreateResponse,
@@ -23,7 +23,7 @@ class TestSession:
 
     @parametrize
     def test_method_create(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.create(
+        session = client.alpha.agents.session.create(
             agent_id="agent_id",
             session_name="session_name",
         )
@@ -31,7 +31,7 @@ def test_method_create(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_create(self, client: LlamaStackClient) -> None:
-        response = client.agents.session.with_raw_response.create(
+        response = client.alpha.agents.session.with_raw_response.create(
             agent_id="agent_id",
             session_name="session_name",
         )
@@ -43,7 +43,7 @@ def test_raw_response_create(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: LlamaStackClient) -> None:
-        with client.agents.session.with_streaming_response.create(
+        with client.alpha.agents.session.with_streaming_response.create(
             agent_id="agent_id",
             session_name="session_name",
         ) as response:
@@ -58,14 +58,14 @@ def test_streaming_response_create(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_create(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.session.with_raw_response.create(
+            client.alpha.agents.session.with_raw_response.create(
                 agent_id="",
                 session_name="session_name",
             )
 
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.retrieve(
+        session = client.alpha.agents.session.retrieve(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -73,7 +73,7 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.retrieve(
+        session = client.alpha.agents.session.retrieve(
             session_id="session_id",
             agent_id="agent_id",
             turn_ids=["string"],
@@ -82,7 +82,7 @@ def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.agents.session.with_raw_response.retrieve(
+        response = client.alpha.agents.session.with_raw_response.retrieve(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -94,7 +94,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.agents.session.with_streaming_response.retrieve(
+        with client.alpha.agents.session.with_streaming_response.retrieve(
             session_id="session_id",
             agent_id="agent_id",
         ) as response:
@@ -109,27 +109,27 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.session.with_raw_response.retrieve(
+            client.alpha.agents.session.with_raw_response.retrieve(
                 session_id="session_id",
                 agent_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.session.with_raw_response.retrieve(
+            client.alpha.agents.session.with_raw_response.retrieve(
                 session_id="",
                 agent_id="agent_id",
             )
 
     @parametrize
     def test_method_list(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.list(
+        session = client.alpha.agents.session.list(
             agent_id="agent_id",
         )
         assert_matches_type(SessionListResponse, session, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.list(
+        session = client.alpha.agents.session.list(
             agent_id="agent_id",
             limit=0,
             start_index=0,
@@ -138,7 +138,7 @@ def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.agents.session.with_raw_response.list(
+        response = client.alpha.agents.session.with_raw_response.list(
             agent_id="agent_id",
         )
 
@@ -149,7 +149,7 @@ def test_raw_response_list(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.agents.session.with_streaming_response.list(
+        with client.alpha.agents.session.with_streaming_response.list(
             agent_id="agent_id",
         ) as response:
             assert not response.is_closed
@@ -163,13 +163,13 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_list(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.session.with_raw_response.list(
+            client.alpha.agents.session.with_raw_response.list(
                 agent_id="",
             )
 
     @parametrize
     def test_method_delete(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.delete(
+        session = client.alpha.agents.session.delete(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -177,7 +177,7 @@ def test_method_delete(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_delete(self, client: LlamaStackClient) -> None:
-        response = client.agents.session.with_raw_response.delete(
+        response = client.alpha.agents.session.with_raw_response.delete(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -189,7 +189,7 @@ def test_raw_response_delete(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
-        with client.agents.session.with_streaming_response.delete(
+        with client.alpha.agents.session.with_streaming_response.delete(
             session_id="session_id",
             agent_id="agent_id",
         ) as response:
@@ -204,13 +204,13 @@ def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_delete(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.session.with_raw_response.delete(
+            client.alpha.agents.session.with_raw_response.delete(
                 session_id="session_id",
                 agent_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.session.with_raw_response.delete(
+            client.alpha.agents.session.with_raw_response.delete(
                 session_id="",
                 agent_id="agent_id",
             )
@@ -223,7 +223,7 @@ class TestAsyncSession:
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.create(
+        session = await async_client.alpha.agents.session.create(
             agent_id="agent_id",
             session_name="session_name",
         )
@@ -231,7 +231,7 @@ async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.session.with_raw_response.create(
+        response = await async_client.alpha.agents.session.with_raw_response.create(
             agent_id="agent_id",
             session_name="session_name",
         )
@@ -243,7 +243,7 @@ async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.session.with_streaming_response.create(
+        async with async_client.alpha.agents.session.with_streaming_response.create(
             agent_id="agent_id",
             session_name="session_name",
         ) as response:
@@ -258,14 +258,14 @@ async def test_streaming_response_create(self, async_client: AsyncLlamaStackClie
     @parametrize
     async def test_path_params_create(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.session.with_raw_response.create(
+            await async_client.alpha.agents.session.with_raw_response.create(
                 agent_id="",
                 session_name="session_name",
             )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.retrieve(
+        session = await async_client.alpha.agents.session.retrieve(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -273,7 +273,7 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non
 
     @parametrize
     async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.retrieve(
+        session = await async_client.alpha.agents.session.retrieve(
             session_id="session_id",
             agent_id="agent_id",
             turn_ids=["string"],
@@ -282,7 +282,7 @@ async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaSta
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.session.with_raw_response.retrieve(
+        response = await async_client.alpha.agents.session.with_raw_response.retrieve(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -294,7 +294,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.session.with_streaming_response.retrieve(
+        async with async_client.alpha.agents.session.with_streaming_response.retrieve(
             session_id="session_id",
             agent_id="agent_id",
         ) as response:
@@ -309,27 +309,27 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.session.with_raw_response.retrieve(
+            await async_client.alpha.agents.session.with_raw_response.retrieve(
                 session_id="session_id",
                 agent_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.session.with_raw_response.retrieve(
+            await async_client.alpha.agents.session.with_raw_response.retrieve(
                 session_id="",
                 agent_id="agent_id",
             )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.list(
+        session = await async_client.alpha.agents.session.list(
             agent_id="agent_id",
         )
         assert_matches_type(SessionListResponse, session, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.list(
+        session = await async_client.alpha.agents.session.list(
             agent_id="agent_id",
             limit=0,
             start_index=0,
@@ -338,7 +338,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackCl
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.session.with_raw_response.list(
+        response = await async_client.alpha.agents.session.with_raw_response.list(
             agent_id="agent_id",
         )
 
@@ -349,7 +349,7 @@ async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> N
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.session.with_streaming_response.list(
+        async with async_client.alpha.agents.session.with_streaming_response.list(
             agent_id="agent_id",
         ) as response:
             assert not response.is_closed
@@ -363,13 +363,13 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
     @parametrize
     async def test_path_params_list(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.session.with_raw_response.list(
+            await async_client.alpha.agents.session.with_raw_response.list(
                 agent_id="",
             )
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.delete(
+        session = await async_client.alpha.agents.session.delete(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -377,7 +377,7 @@ async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.session.with_raw_response.delete(
+        response = await async_client.alpha.agents.session.with_raw_response.delete(
             session_id="session_id",
             agent_id="agent_id",
         )
@@ -389,7 +389,7 @@ async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.session.with_streaming_response.delete(
+        async with async_client.alpha.agents.session.with_streaming_response.delete(
             session_id="session_id",
             agent_id="agent_id",
         ) as response:
@@ -404,13 +404,13 @@ async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClie
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.session.with_raw_response.delete(
+            await async_client.alpha.agents.session.with_raw_response.delete(
                 session_id="session_id",
                 agent_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.session.with_raw_response.delete(
+            await async_client.alpha.agents.session.with_raw_response.delete(
                 session_id="",
                 agent_id="agent_id",
             )
diff --git a/tests/api_resources/agents/test_steps.py b/tests/api_resources/alpha/agents/test_steps.py
similarity index 84%
rename from tests/api_resources/agents/test_steps.py
rename to tests/api_resources/alpha/agents/test_steps.py
index 5555a9a4..5bf35fc3 100644
--- a/tests/api_resources/agents/test_steps.py
+++ b/tests/api_resources/alpha/agents/test_steps.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.agents import StepRetrieveResponse
+from llama_stack_client.types.alpha.agents import StepRetrieveResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -19,7 +19,7 @@ class TestSteps:
 
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        step = client.agents.steps.retrieve(
+        step = client.alpha.agents.steps.retrieve(
             step_id="step_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -29,7 +29,7 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.agents.steps.with_raw_response.retrieve(
+        response = client.alpha.agents.steps.with_raw_response.retrieve(
             step_id="step_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -43,7 +43,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.agents.steps.with_streaming_response.retrieve(
+        with client.alpha.agents.steps.with_streaming_response.retrieve(
             step_id="step_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -60,7 +60,7 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.steps.with_raw_response.retrieve(
+            client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="step_id",
                 agent_id="",
                 session_id="session_id",
@@ -68,7 +68,7 @@ def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.steps.with_raw_response.retrieve(
+            client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="step_id",
                 agent_id="agent_id",
                 session_id="",
@@ -76,7 +76,7 @@ def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.agents.steps.with_raw_response.retrieve(
+            client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="step_id",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -84,7 +84,7 @@ def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
-            client.agents.steps.with_raw_response.retrieve(
+            client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -99,7 +99,7 @@ class TestAsyncSteps:
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        step = await async_client.agents.steps.retrieve(
+        step = await async_client.alpha.agents.steps.retrieve(
             step_id="step_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -109,7 +109,7 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.steps.with_raw_response.retrieve(
+        response = await async_client.alpha.agents.steps.with_raw_response.retrieve(
             step_id="step_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -123,7 +123,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.steps.with_streaming_response.retrieve(
+        async with async_client.alpha.agents.steps.with_streaming_response.retrieve(
             step_id="step_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -140,7 +140,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.steps.with_raw_response.retrieve(
+            await async_client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="step_id",
                 agent_id="",
                 session_id="session_id",
@@ -148,7 +148,7 @@ async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.steps.with_raw_response.retrieve(
+            await async_client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="step_id",
                 agent_id="agent_id",
                 session_id="",
@@ -156,7 +156,7 @@ async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.agents.steps.with_raw_response.retrieve(
+            await async_client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="step_id",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -164,7 +164,7 @@ async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
-            await async_client.agents.steps.with_raw_response.retrieve(
+            await async_client.alpha.agents.steps.with_raw_response.retrieve(
                 step_id="",
                 agent_id="agent_id",
                 session_id="session_id",
diff --git a/tests/api_resources/agents/test_turn.py b/tests/api_resources/alpha/agents/test_turn.py
similarity index 88%
rename from tests/api_resources/agents/test_turn.py
rename to tests/api_resources/alpha/agents/test_turn.py
index 31eb53f9..9a2a500f 100644
--- a/tests/api_resources/agents/test_turn.py
+++ b/tests/api_resources/alpha/agents/test_turn.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.agents import Turn
+from llama_stack_client.types.alpha.agents import Turn
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -19,7 +19,7 @@ class TestTurn:
 
     @parametrize
     def test_method_create_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.create(
+        turn = client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -33,7 +33,7 @@ def test_method_create_overload_1(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.create(
+        turn = client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -61,7 +61,7 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
 
     @parametrize
     def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.create(
+        response = client.alpha.agents.turn.with_raw_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -79,7 +79,7 @@ def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.create(
+        with client.alpha.agents.turn.with_streaming_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -100,7 +100,7 @@ def test_streaming_response_create_overload_1(self, client: LlamaStackClient) ->
     @parametrize
     def test_path_params_create_overload_1(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.create(
+            client.alpha.agents.turn.with_raw_response.create(
                 session_id="session_id",
                 agent_id="",
                 messages=[
@@ -112,7 +112,7 @@ def test_path_params_create_overload_1(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.create(
+            client.alpha.agents.turn.with_raw_response.create(
                 session_id="",
                 agent_id="agent_id",
                 messages=[
@@ -125,7 +125,7 @@ def test_path_params_create_overload_1(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_create_overload_2(self, client: LlamaStackClient) -> None:
-        turn_stream = client.agents.turn.create(
+        turn_stream = client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -140,7 +140,7 @@ def test_method_create_overload_2(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient) -> None:
-        turn_stream = client.agents.turn.create(
+        turn_stream = client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -168,7 +168,7 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
 
     @parametrize
     def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.create(
+        response = client.alpha.agents.turn.with_raw_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -186,7 +186,7 @@ def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.create(
+        with client.alpha.agents.turn.with_streaming_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -208,7 +208,7 @@ def test_streaming_response_create_overload_2(self, client: LlamaStackClient) ->
     @parametrize
     def test_path_params_create_overload_2(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.create(
+            client.alpha.agents.turn.with_raw_response.create(
                 session_id="session_id",
                 agent_id="",
                 messages=[
@@ -221,7 +221,7 @@ def test_path_params_create_overload_2(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.create(
+            client.alpha.agents.turn.with_raw_response.create(
                 session_id="",
                 agent_id="agent_id",
                 messages=[
@@ -235,7 +235,7 @@ def test_path_params_create_overload_2(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.retrieve(
+        turn = client.alpha.agents.turn.retrieve(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -244,7 +244,7 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.retrieve(
+        response = client.alpha.agents.turn.with_raw_response.retrieve(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -257,7 +257,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.retrieve(
+        with client.alpha.agents.turn.with_streaming_response.retrieve(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -273,21 +273,21 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.retrieve(
+            client.alpha.agents.turn.with_raw_response.retrieve(
                 turn_id="turn_id",
                 agent_id="",
                 session_id="session_id",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.retrieve(
+            client.alpha.agents.turn.with_raw_response.retrieve(
                 turn_id="turn_id",
                 agent_id="agent_id",
                 session_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.agents.turn.with_raw_response.retrieve(
+            client.alpha.agents.turn.with_raw_response.retrieve(
                 turn_id="",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -295,7 +295,7 @@ def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_resume_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.resume(
+        turn = client.alpha.agents.turn.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -311,7 +311,7 @@ def test_method_resume_overload_1(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_resume_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.resume(
+        turn = client.alpha.agents.turn.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -329,7 +329,7 @@ def test_method_resume_with_all_params_overload_1(self, client: LlamaStackClient
 
     @parametrize
     def test_raw_response_resume_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.resume(
+        response = client.alpha.agents.turn.with_raw_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -349,7 +349,7 @@ def test_raw_response_resume_overload_1(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_resume_overload_1(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.resume(
+        with client.alpha.agents.turn.with_streaming_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -372,7 +372,7 @@ def test_streaming_response_resume_overload_1(self, client: LlamaStackClient) ->
     @parametrize
     def test_path_params_resume_overload_1(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
+            client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="",
                 session_id="session_id",
@@ -386,7 +386,7 @@ def test_path_params_resume_overload_1(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
+            client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="agent_id",
                 session_id="",
@@ -400,7 +400,7 @@ def test_path_params_resume_overload_1(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
+            client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -415,7 +415,7 @@ def test_path_params_resume_overload_1(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_resume_overload_2(self, client: LlamaStackClient) -> None:
-        turn_stream = client.agents.turn.resume(
+        turn_stream = client.alpha.agents.turn.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -432,7 +432,7 @@ def test_method_resume_overload_2(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_resume_overload_2(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.resume(
+        response = client.alpha.agents.turn.with_raw_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -452,7 +452,7 @@ def test_raw_response_resume_overload_2(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_resume_overload_2(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.resume(
+        with client.alpha.agents.turn.with_streaming_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -476,7 +476,7 @@ def test_streaming_response_resume_overload_2(self, client: LlamaStackClient) ->
     @parametrize
     def test_path_params_resume_overload_2(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
+            client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="",
                 session_id="session_id",
@@ -491,7 +491,7 @@ def test_path_params_resume_overload_2(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
+            client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="agent_id",
                 session_id="",
@@ -506,7 +506,7 @@ def test_path_params_resume_overload_2(self, client: LlamaStackClient) -> None:
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
+            client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -528,7 +528,7 @@ class TestAsyncTurn:
 
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.create(
+        turn = await async_client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -542,7 +542,7 @@ async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClien
 
     @parametrize
     async def test_method_create_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.create(
+        turn = await async_client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -570,7 +570,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
 
     @parametrize
     async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.create(
+        response = await async_client.alpha.agents.turn.with_raw_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -588,7 +588,7 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStac
 
     @parametrize
     async def test_streaming_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.create(
+        async with async_client.alpha.agents.turn.with_streaming_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -609,7 +609,7 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncLla
     @parametrize
     async def test_path_params_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.create(
+            await async_client.alpha.agents.turn.with_raw_response.create(
                 session_id="session_id",
                 agent_id="",
                 messages=[
@@ -621,7 +621,7 @@ async def test_path_params_create_overload_1(self, async_client: AsyncLlamaStack
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.create(
+            await async_client.alpha.agents.turn.with_raw_response.create(
                 session_id="",
                 agent_id="agent_id",
                 messages=[
@@ -634,7 +634,7 @@ async def test_path_params_create_overload_1(self, async_client: AsyncLlamaStack
 
     @parametrize
     async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        turn_stream = await async_client.agents.turn.create(
+        turn_stream = await async_client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -649,7 +649,7 @@ async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClien
 
     @parametrize
     async def test_method_create_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        turn_stream = await async_client.agents.turn.create(
+        turn_stream = await async_client.alpha.agents.turn.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -677,7 +677,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
 
     @parametrize
     async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.create(
+        response = await async_client.alpha.agents.turn.with_raw_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -695,7 +695,7 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStac
 
     @parametrize
     async def test_streaming_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.create(
+        async with async_client.alpha.agents.turn.with_streaming_response.create(
             session_id="session_id",
             agent_id="agent_id",
             messages=[
@@ -717,7 +717,7 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncLla
     @parametrize
     async def test_path_params_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.create(
+            await async_client.alpha.agents.turn.with_raw_response.create(
                 session_id="session_id",
                 agent_id="",
                 messages=[
@@ -730,7 +730,7 @@ async def test_path_params_create_overload_2(self, async_client: AsyncLlamaStack
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.create(
+            await async_client.alpha.agents.turn.with_raw_response.create(
                 session_id="",
                 agent_id="agent_id",
                 messages=[
@@ -744,7 +744,7 @@ async def test_path_params_create_overload_2(self, async_client: AsyncLlamaStack
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.retrieve(
+        turn = await async_client.alpha.agents.turn.retrieve(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -753,7 +753,7 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.retrieve(
+        response = await async_client.alpha.agents.turn.with_raw_response.retrieve(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -766,7 +766,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.retrieve(
+        async with async_client.alpha.agents.turn.with_streaming_response.retrieve(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -782,21 +782,21 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.retrieve(
+            await async_client.alpha.agents.turn.with_raw_response.retrieve(
                 turn_id="turn_id",
                 agent_id="",
                 session_id="session_id",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.retrieve(
+            await async_client.alpha.agents.turn.with_raw_response.retrieve(
                 turn_id="turn_id",
                 agent_id="agent_id",
                 session_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.retrieve(
+            await async_client.alpha.agents.turn.with_raw_response.retrieve(
                 turn_id="",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -804,7 +804,7 @@ async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -
 
     @parametrize
     async def test_method_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.resume(
+        turn = await async_client.alpha.agents.turn.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -820,7 +820,7 @@ async def test_method_resume_overload_1(self, async_client: AsyncLlamaStackClien
 
     @parametrize
     async def test_method_resume_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.resume(
+        turn = await async_client.alpha.agents.turn.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -838,7 +838,7 @@ async def test_method_resume_with_all_params_overload_1(self, async_client: Asyn
 
     @parametrize
     async def test_raw_response_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.resume(
+        response = await async_client.alpha.agents.turn.with_raw_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -858,7 +858,7 @@ async def test_raw_response_resume_overload_1(self, async_client: AsyncLlamaStac
 
     @parametrize
     async def test_streaming_response_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.resume(
+        async with async_client.alpha.agents.turn.with_streaming_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -881,7 +881,7 @@ async def test_streaming_response_resume_overload_1(self, async_client: AsyncLla
     @parametrize
     async def test_path_params_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
+            await async_client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="",
                 session_id="session_id",
@@ -895,7 +895,7 @@ async def test_path_params_resume_overload_1(self, async_client: AsyncLlamaStack
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
+            await async_client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="agent_id",
                 session_id="",
@@ -909,7 +909,7 @@ async def test_path_params_resume_overload_1(self, async_client: AsyncLlamaStack
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
+            await async_client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="",
                 agent_id="agent_id",
                 session_id="session_id",
@@ -924,7 +924,7 @@ async def test_path_params_resume_overload_1(self, async_client: AsyncLlamaStack
 
     @parametrize
     async def test_method_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        turn_stream = await async_client.agents.turn.resume(
+        turn_stream = await async_client.alpha.agents.turn.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -941,7 +941,7 @@ async def test_method_resume_overload_2(self, async_client: AsyncLlamaStackClien
 
     @parametrize
     async def test_raw_response_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.resume(
+        response = await async_client.alpha.agents.turn.with_raw_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -961,7 +961,7 @@ async def test_raw_response_resume_overload_2(self, async_client: AsyncLlamaStac
 
     @parametrize
     async def test_streaming_response_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.resume(
+        async with async_client.alpha.agents.turn.with_streaming_response.resume(
             turn_id="turn_id",
             agent_id="agent_id",
             session_id="session_id",
@@ -985,7 +985,7 @@ async def test_streaming_response_resume_overload_2(self, async_client: AsyncLla
     @parametrize
     async def test_path_params_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
+            await async_client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="",
                 session_id="session_id",
@@ -1000,7 +1000,7 @@ async def test_path_params_resume_overload_2(self, async_client: AsyncLlamaStack
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
+            await async_client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="turn_id",
                 agent_id="agent_id",
                 session_id="",
@@ -1015,7 +1015,7 @@ async def test_path_params_resume_overload_2(self, async_client: AsyncLlamaStack
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
+            await async_client.alpha.agents.turn.with_raw_response.resume(
                 turn_id="",
                 agent_id="agent_id",
                 session_id="session_id",
diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/alpha/test_agents.py
similarity index 87%
rename from tests/api_resources/test_agents.py
rename to tests/api_resources/alpha/test_agents.py
index c19bc9bf..d67e8457 100644
--- a/tests/api_resources/test_agents.py
+++ b/tests/api_resources/alpha/test_agents.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
+from llama_stack_client.types.alpha import (
     AgentListResponse,
     AgentCreateResponse,
     AgentRetrieveResponse,
@@ -23,7 +23,7 @@ class TestAgents:
 
     @parametrize
     def test_method_create(self, client: LlamaStackClient) -> None:
-        agent = client.agents.create(
+        agent = client.alpha.agents.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -33,7 +33,7 @@ def test_method_create(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
-        agent = client.agents.create(
+        agent = client.alpha.agents.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -84,7 +84,7 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_create(self, client: LlamaStackClient) -> None:
-        response = client.agents.with_raw_response.create(
+        response = client.alpha.agents.with_raw_response.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -98,7 +98,7 @@ def test_raw_response_create(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: LlamaStackClient) -> None:
-        with client.agents.with_streaming_response.create(
+        with client.alpha.agents.with_streaming_response.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -114,14 +114,14 @@ def test_streaming_response_create(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        agent = client.agents.retrieve(
+        agent = client.alpha.agents.retrieve(
             "agent_id",
         )
         assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.agents.with_raw_response.retrieve(
+        response = client.alpha.agents.with_raw_response.retrieve(
             "agent_id",
         )
 
@@ -132,7 +132,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.agents.with_streaming_response.retrieve(
+        with client.alpha.agents.with_streaming_response.retrieve(
             "agent_id",
         ) as response:
             assert not response.is_closed
@@ -146,18 +146,18 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.with_raw_response.retrieve(
+            client.alpha.agents.with_raw_response.retrieve(
                 "",
             )
 
     @parametrize
     def test_method_list(self, client: LlamaStackClient) -> None:
-        agent = client.agents.list()
+        agent = client.alpha.agents.list()
         assert_matches_type(AgentListResponse, agent, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        agent = client.agents.list(
+        agent = client.alpha.agents.list(
             limit=0,
             start_index=0,
         )
@@ -165,7 +165,7 @@ def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.agents.with_raw_response.list()
+        response = client.alpha.agents.with_raw_response.list()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -174,7 +174,7 @@ def test_raw_response_list(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.agents.with_streaming_response.list() as response:
+        with client.alpha.agents.with_streaming_response.list() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -185,14 +185,14 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_delete(self, client: LlamaStackClient) -> None:
-        agent = client.agents.delete(
+        agent = client.alpha.agents.delete(
             "agent_id",
         )
         assert agent is None
 
     @parametrize
     def test_raw_response_delete(self, client: LlamaStackClient) -> None:
-        response = client.agents.with_raw_response.delete(
+        response = client.alpha.agents.with_raw_response.delete(
             "agent_id",
         )
 
@@ -203,7 +203,7 @@ def test_raw_response_delete(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
-        with client.agents.with_streaming_response.delete(
+        with client.alpha.agents.with_streaming_response.delete(
             "agent_id",
         ) as response:
             assert not response.is_closed
@@ -217,7 +217,7 @@ def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_delete(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.with_raw_response.delete(
+            client.alpha.agents.with_raw_response.delete(
                 "",
             )
 
@@ -229,7 +229,7 @@ class TestAsyncAgents:
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.create(
+        agent = await async_client.alpha.agents.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -239,7 +239,7 @@ async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.create(
+        agent = await async_client.alpha.agents.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -290,7 +290,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.with_raw_response.create(
+        response = await async_client.alpha.agents.with_raw_response.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -304,7 +304,7 @@ async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.with_streaming_response.create(
+        async with async_client.alpha.agents.with_streaming_response.create(
             agent_config={
                 "instructions": "instructions",
                 "model": "model",
@@ -320,14 +320,14 @@ async def test_streaming_response_create(self, async_client: AsyncLlamaStackClie
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.retrieve(
+        agent = await async_client.alpha.agents.retrieve(
             "agent_id",
         )
         assert_matches_type(AgentRetrieveResponse, agent, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.with_raw_response.retrieve(
+        response = await async_client.alpha.agents.with_raw_response.retrieve(
             "agent_id",
         )
 
@@ -338,7 +338,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.with_streaming_response.retrieve(
+        async with async_client.alpha.agents.with_streaming_response.retrieve(
             "agent_id",
         ) as response:
             assert not response.is_closed
@@ -352,18 +352,18 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.with_raw_response.retrieve(
+            await async_client.alpha.agents.with_raw_response.retrieve(
                 "",
             )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.list()
+        agent = await async_client.alpha.agents.list()
         assert_matches_type(AgentListResponse, agent, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.list(
+        agent = await async_client.alpha.agents.list(
             limit=0,
             start_index=0,
         )
@@ -371,7 +371,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackCl
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.with_raw_response.list()
+        response = await async_client.alpha.agents.with_raw_response.list()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -380,7 +380,7 @@ async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> N
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.with_streaming_response.list() as response:
+        async with async_client.alpha.agents.with_streaming_response.list() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -391,14 +391,14 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.delete(
+        agent = await async_client.alpha.agents.delete(
             "agent_id",
         )
         assert agent is None
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.with_raw_response.delete(
+        response = await async_client.alpha.agents.with_raw_response.delete(
             "agent_id",
         )
 
@@ -409,7 +409,7 @@ async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.with_streaming_response.delete(
+        async with async_client.alpha.agents.with_streaming_response.delete(
             "agent_id",
         ) as response:
             assert not response.is_closed
@@ -423,6 +423,6 @@ async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClie
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.with_raw_response.delete(
+            await async_client.alpha.agents.with_raw_response.delete(
                 "",
             )
diff --git a/tests/api_resources/test_inference.py b/tests/api_resources/alpha/test_inference.py
similarity index 86%
rename from tests/api_resources/test_inference.py
rename to tests/api_resources/alpha/test_inference.py
index f26802c2..551e2213 100644
--- a/tests/api_resources/test_inference.py
+++ b/tests/api_resources/alpha/test_inference.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import InferenceRerankResponse
+from llama_stack_client.types.alpha import InferenceRerankResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -19,7 +19,7 @@ class TestInference:
 
     @parametrize
     def test_method_rerank(self, client: LlamaStackClient) -> None:
-        inference = client.inference.rerank(
+        inference = client.alpha.inference.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -28,7 +28,7 @@ def test_method_rerank(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_rerank_with_all_params(self, client: LlamaStackClient) -> None:
-        inference = client.inference.rerank(
+        inference = client.alpha.inference.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -38,7 +38,7 @@ def test_method_rerank_with_all_params(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_rerank(self, client: LlamaStackClient) -> None:
-        response = client.inference.with_raw_response.rerank(
+        response = client.alpha.inference.with_raw_response.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -51,7 +51,7 @@ def test_raw_response_rerank(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_rerank(self, client: LlamaStackClient) -> None:
-        with client.inference.with_streaming_response.rerank(
+        with client.alpha.inference.with_streaming_response.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -72,7 +72,7 @@ class TestAsyncInference:
 
     @parametrize
     async def test_method_rerank(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.rerank(
+        inference = await async_client.alpha.inference.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -81,7 +81,7 @@ async def test_method_rerank(self, async_client: AsyncLlamaStackClient) -> None:
 
     @parametrize
     async def test_method_rerank_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.rerank(
+        inference = await async_client.alpha.inference.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -91,7 +91,7 @@ async def test_method_rerank_with_all_params(self, async_client: AsyncLlamaStack
 
     @parametrize
     async def test_raw_response_rerank(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.inference.with_raw_response.rerank(
+        response = await async_client.alpha.inference.with_raw_response.rerank(
             items=["string"],
             model="model",
             query="string",
@@ -104,7 +104,7 @@ async def test_raw_response_rerank(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_rerank(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.inference.with_streaming_response.rerank(
+        async with async_client.alpha.inference.with_streaming_response.rerank(
             items=["string"],
             model="model",
             query="string",

From 0208f2aa0153d65ed4d46123d4509c66d2730e3c Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 30 Sep 2025 19:37:29 +0000
Subject: [PATCH 4/7] fix: fix stream event model reference

---
 .stats.yml                                    |   2 +-
 .../resources/alpha/agents/turn.py            |  33 ++--
 .../types/alpha/agents/__init__.py            |   2 +
 .../agent_turn_response_stream_chunk.py       |  11 ++
 .../types/alpha/agents/turn_response_event.py | 160 ++++++++++++++++++
 5 files changed, 191 insertions(+), 17 deletions(-)
 create mode 100644 src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
 create mode 100644 src/llama_stack_client/types/alpha/agents/turn_response_event.py

diff --git a/.stats.yml b/.stats.yml
index 35d70772..f7df1a90 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 109
 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-4337a6181c2db17737133e944b4b660a5e00ea10dce6be3252918e39451e9b5f.yml
 openapi_spec_hash: a0bc8f4b5f45bc5741fed8eaa61171c3
-config_hash: 03aab396899c7d9aa3fba867ce54824b
+config_hash: 47ef2eb62d188340f22eb6dea3693f15
diff --git a/src/llama_stack_client/resources/alpha/agents/turn.py b/src/llama_stack_client/resources/alpha/agents/turn.py
index e8b7e97a..367a1b5a 100644
--- a/src/llama_stack_client/resources/alpha/agents/turn.py
+++ b/src/llama_stack_client/resources/alpha/agents/turn.py
@@ -22,6 +22,7 @@
 from ....types.alpha.agents import turn_create_params, turn_resume_params
 from ....types.alpha.agents.turn import Turn
 from ....types.alpha.tool_response_param import ToolResponseParam
+from ....types.alpha.agents.agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk
 
 __all__ = ["TurnResource", "AsyncTurnResource"]
 
@@ -108,7 +109,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Stream[object]:
+    ) -> Stream[AgentTurnResponseStreamChunk]:
         """
         Create a new turn for an agent.
 
@@ -153,7 +154,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | Stream[object]:
+    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
         """
         Create a new turn for an agent.
 
@@ -198,7 +199,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | Stream[object]:
+    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         if not session_id:
@@ -222,7 +223,7 @@ def create(
             ),
             cast_to=Turn,
             stream=stream or False,
-            stream_cls=Stream[object],
+            stream_cls=Stream[AgentTurnResponseStreamChunk],
         )
 
     def retrieve(
@@ -317,7 +318,7 @@ def resume(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Stream[object]:
+    ) -> Stream[AgentTurnResponseStreamChunk]:
         """Resume an agent turn with executed tool call responses.
 
         When a Turn has the
@@ -355,7 +356,7 @@ def resume(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | Stream[object]:
+    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
         """Resume an agent turn with executed tool call responses.
 
         When a Turn has the
@@ -393,7 +394,7 @@ def resume(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | Stream[object]:
+    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         if not session_id:
@@ -416,7 +417,7 @@ def resume(
             ),
             cast_to=Turn,
             stream=stream or False,
-            stream_cls=Stream[object],
+            stream_cls=Stream[AgentTurnResponseStreamChunk],
         )
 
 
@@ -502,7 +503,7 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AsyncStream[object]:
+    ) -> AsyncStream[AgentTurnResponseStreamChunk]:
         """
         Create a new turn for an agent.
 
@@ -547,7 +548,7 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | AsyncStream[object]:
+    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
         """
         Create a new turn for an agent.
 
@@ -592,7 +593,7 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | AsyncStream[object]:
+    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         if not session_id:
@@ -616,7 +617,7 @@ async def create(
             ),
             cast_to=Turn,
             stream=stream or False,
-            stream_cls=AsyncStream[object],
+            stream_cls=AsyncStream[AgentTurnResponseStreamChunk],
         )
 
     async def retrieve(
@@ -711,7 +712,7 @@ async def resume(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> AsyncStream[object]:
+    ) -> AsyncStream[AgentTurnResponseStreamChunk]:
         """Resume an agent turn with executed tool call responses.
 
         When a Turn has the
@@ -749,7 +750,7 @@ async def resume(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | AsyncStream[object]:
+    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
         """Resume an agent turn with executed tool call responses.
 
         When a Turn has the
@@ -787,7 +788,7 @@ async def resume(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Turn | AsyncStream[object]:
+    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         if not session_id:
@@ -810,7 +811,7 @@ async def resume(
             ),
             cast_to=Turn,
             stream=stream or False,
-            stream_cls=AsyncStream[object],
+            stream_cls=AsyncStream[AgentTurnResponseStreamChunk],
         )
 
 
diff --git a/src/llama_stack_client/types/alpha/agents/__init__.py b/src/llama_stack_client/types/alpha/agents/__init__.py
index 2a4f602d..3a144840 100644
--- a/src/llama_stack_client/types/alpha/agents/__init__.py
+++ b/src/llama_stack_client/types/alpha/agents/__init__.py
@@ -7,8 +7,10 @@
 from .turn_create_params import TurnCreateParams as TurnCreateParams
 from .turn_resume_params import TurnResumeParams as TurnResumeParams
 from .session_list_params import SessionListParams as SessionListParams
+from .turn_response_event import TurnResponseEvent as TurnResponseEvent
 from .session_create_params import SessionCreateParams as SessionCreateParams
 from .session_list_response import SessionListResponse as SessionListResponse
 from .step_retrieve_response import StepRetrieveResponse as StepRetrieveResponse
 from .session_create_response import SessionCreateResponse as SessionCreateResponse
 from .session_retrieve_params import SessionRetrieveParams as SessionRetrieveParams
+from .agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk as AgentTurnResponseStreamChunk
diff --git a/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py b/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
new file mode 100644
index 00000000..c45bf756
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/agent_turn_response_stream_chunk.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ...._models import BaseModel
+from .turn_response_event import TurnResponseEvent
+
+__all__ = ["AgentTurnResponseStreamChunk"]
+
+
+class AgentTurnResponseStreamChunk(BaseModel):
+    event: TurnResponseEvent
+    """Individual event in the agent turn response stream"""
diff --git a/src/llama_stack_client/types/alpha/agents/turn_response_event.py b/src/llama_stack_client/types/alpha/agents/turn_response_event.py
new file mode 100644
index 00000000..c162135d
--- /dev/null
+++ b/src/llama_stack_client/types/alpha/agents/turn_response_event.py
@@ -0,0 +1,160 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .turn import Turn
+from ...._utils import PropertyInfo
+from ...._models import BaseModel
+from ..inference_step import InferenceStep
+from ..shield_call_step import ShieldCallStep
+from ...shared.tool_call import ToolCall
+from ..tool_execution_step import ToolExecutionStep
+from ..memory_retrieval_step import MemoryRetrievalStep
+
+__all__ = [
+    "TurnResponseEvent",
+    "Payload",
+    "PayloadAgentTurnResponseStepStartPayload",
+    "PayloadAgentTurnResponseStepProgressPayload",
+    "PayloadAgentTurnResponseStepProgressPayloadDelta",
+    "PayloadAgentTurnResponseStepProgressPayloadDeltaTextDelta",
+    "PayloadAgentTurnResponseStepProgressPayloadDeltaImageDelta",
+    "PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDelta",
+    "PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDeltaToolCall",
+    "PayloadAgentTurnResponseStepCompletePayload",
+    "PayloadAgentTurnResponseStepCompletePayloadStepDetails",
+    "PayloadAgentTurnResponseTurnStartPayload",
+    "PayloadAgentTurnResponseTurnCompletePayload",
+    "PayloadAgentTurnResponseTurnAwaitingInputPayload",
+]
+
+
+class PayloadAgentTurnResponseStepStartPayload(BaseModel):
+    event_type: Literal["step_start"]
+    """Type of event being reported"""
+
+    step_id: str
+    """Unique identifier for the step within a turn"""
+
+    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
+    """Type of step being executed"""
+
+    metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
+    """(Optional) Additional metadata for the step"""
+
+
+class PayloadAgentTurnResponseStepProgressPayloadDeltaTextDelta(BaseModel):
+    text: str
+    """The incremental text content"""
+
+    type: Literal["text"]
+    """Discriminator type of the delta. Always "text" """
+
+
+class PayloadAgentTurnResponseStepProgressPayloadDeltaImageDelta(BaseModel):
+    image: str
+    """The incremental image data as bytes"""
+
+    type: Literal["image"]
+    """Discriminator type of the delta. Always "image" """
+
+
+PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDeltaToolCall: TypeAlias = Union[str, ToolCall]
+
+
+class PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDelta(BaseModel):
+    parse_status: Literal["started", "in_progress", "failed", "succeeded"]
+    """Current parsing status of the tool call"""
+
+    tool_call: PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDeltaToolCall
+    """Either an in-progress tool call string or the final parsed tool call"""
+
+    type: Literal["tool_call"]
+    """Discriminator type of the delta. Always "tool_call" """
+
+
+PayloadAgentTurnResponseStepProgressPayloadDelta: TypeAlias = Annotated[
+    Union[
+        PayloadAgentTurnResponseStepProgressPayloadDeltaTextDelta,
+        PayloadAgentTurnResponseStepProgressPayloadDeltaImageDelta,
+        PayloadAgentTurnResponseStepProgressPayloadDeltaToolCallDelta,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PayloadAgentTurnResponseStepProgressPayload(BaseModel):
+    delta: PayloadAgentTurnResponseStepProgressPayloadDelta
+    """Incremental content changes during step execution"""
+
+    event_type: Literal["step_progress"]
+    """Type of event being reported"""
+
+    step_id: str
+    """Unique identifier for the step within a turn"""
+
+    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
+    """Type of step being executed"""
+
+
+PayloadAgentTurnResponseStepCompletePayloadStepDetails: TypeAlias = Annotated[
+    Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep],
+    PropertyInfo(discriminator="step_type"),
+]
+
+
+class PayloadAgentTurnResponseStepCompletePayload(BaseModel):
+    event_type: Literal["step_complete"]
+    """Type of event being reported"""
+
+    step_details: PayloadAgentTurnResponseStepCompletePayloadStepDetails
+    """Complete details of the executed step"""
+
+    step_id: str
+    """Unique identifier for the step within a turn"""
+
+    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
+    """Type of step being executed"""
+
+
+class PayloadAgentTurnResponseTurnStartPayload(BaseModel):
+    event_type: Literal["turn_start"]
+    """Type of event being reported"""
+
+    turn_id: str
+    """Unique identifier for the turn within a session"""
+
+
+class PayloadAgentTurnResponseTurnCompletePayload(BaseModel):
+    event_type: Literal["turn_complete"]
+    """Type of event being reported"""
+
+    turn: Turn
+    """Complete turn data including all steps and results"""
+
+
+class PayloadAgentTurnResponseTurnAwaitingInputPayload(BaseModel):
+    event_type: Literal["turn_awaiting_input"]
+    """Type of event being reported"""
+
+    turn: Turn
+    """Turn data when waiting for external tool responses"""
+
+
+Payload: TypeAlias = Annotated[
+    Union[
+        PayloadAgentTurnResponseStepStartPayload,
+        PayloadAgentTurnResponseStepProgressPayload,
+        PayloadAgentTurnResponseStepCompletePayload,
+        PayloadAgentTurnResponseTurnStartPayload,
+        PayloadAgentTurnResponseTurnCompletePayload,
+        PayloadAgentTurnResponseTurnAwaitingInputPayload,
+    ],
+    PropertyInfo(discriminator="event_type"),
+]
+
+
+class TurnResponseEvent(BaseModel):
+    payload: Payload
+    """Event-specific payload containing event data"""

From 08670f1023a73edea559bd896a59cbbd127c5a6b Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Tue, 30 Sep 2025 12:53:23 -0700
Subject: [PATCH 5/7] fix(manual): update lib/ references to use the alpha
 namespace

---
 src/llama_stack_client/__init__.py            |  2 +-
 src/llama_stack_client/lib/agents/agent.py    | 27 ++++++++++---------
 .../lib/agents/client_tool.py                 |  3 ++-
 .../lib/agents/tool_parser.py                 |  2 +-
 4 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/llama_stack_client/__init__.py b/src/llama_stack_client/__init__.py
index dea90c02..cc2fcb9b 100644
--- a/src/llama_stack_client/__init__.py
+++ b/src/llama_stack_client/__init__.py
@@ -41,8 +41,8 @@
 from .lib.agents.agent import Agent
 from .lib.agents.event_logger import EventLogger as AgentEventLogger
 from .lib.inference.event_logger import EventLogger as InferenceEventLogger
+from .types.alpha.agents.turn_create_params import Document
 from .types.shared_params.document import Document as RAGDocument
-from .types.agents.turn_create_params import Document
 
 __all__ = [
     "types",
diff --git a/src/llama_stack_client/lib/agents/agent.py b/src/llama_stack_client/lib/agents/agent.py
index 5dc2f236..779c44c2 100644
--- a/src/llama_stack_client/lib/agents/agent.py
+++ b/src/llama_stack_client/lib/agents/agent.py
@@ -7,13 +7,14 @@
 from typing import Any, AsyncIterator, Callable, Iterator, List, Optional, Tuple, Union
 
 from llama_stack_client import LlamaStackClient
-from llama_stack_client.types import ToolResponseMessage, ToolResponseParam, UserMessage
-from llama_stack_client.types.agent_create_params import AgentConfig
-from llama_stack_client.types.agents.agent_turn_response_stream_chunk import (
+from llama_stack_client.types import ToolResponseMessage, UserMessage
+from llama_stack_client.types.alpha import ToolResponseParam
+from llama_stack_client.types.alpha.agent_create_params import AgentConfig
+from llama_stack_client.types.alpha.agents.agent_turn_response_stream_chunk import (
     AgentTurnResponseStreamChunk,
 )
-from llama_stack_client.types.agents.turn import CompletionMessage, Turn
-from llama_stack_client.types.agents.turn_create_params import Document, Toolgroup
+from llama_stack_client.types.alpha.agents.turn import CompletionMessage, Turn
+from llama_stack_client.types.alpha.agents.turn_create_params import Document, Toolgroup
 from llama_stack_client.types.shared.tool_call import ToolCall
 from llama_stack_client.types.shared_params.agent_config import ToolConfig
 from llama_stack_client.types.shared_params.response_format import ResponseFormat
@@ -203,7 +204,7 @@ def __init__(
         self.initialize()
 
     def initialize(self) -> None:
-        agentic_system_create_response = self.client.agents.create(
+        agentic_system_create_response = self.client.alpha.agents.create(
             agent_config=self.agent_config,
             extra_headers=self.extra_headers,
         )
@@ -214,7 +215,7 @@ def initialize(self) -> None:
                 self.builtin_tools[tool.identifier] = tg.get("args", {}) if isinstance(tg, dict) else {}
 
     def create_session(self, session_name: str) -> str:
-        agentic_system_create_session_response = self.client.agents.session.create(
+        agentic_system_create_session_response = self.client.alpha.agents.session.create(
             agent_id=self.agent_id,
             session_name=session_name,
             extra_headers=self.extra_headers,
@@ -322,7 +323,7 @@ def _create_turn_streaming(
         n_iter = 0
 
         # 1. create an agent turn
-        turn_response = self.client.agents.turn.create(
+        turn_response = self.client.alpha.agents.turn.create(
             agent_id=self.agent_id,
             # use specified session_id or last session created
             session_id=session_id or self.session_id[-1],
@@ -361,7 +362,7 @@ def _create_turn_streaming(
                     tool_responses = self._run_tool_calls(tool_calls)
 
                     # pass it to next iteration
-                    turn_response = self.client.agents.turn.resume(
+                    turn_response = self.client.alpha.agents.turn.resume(
                         agent_id=self.agent_id,
                         session_id=session_id or self.session_id[-1],
                         turn_id=turn_id,
@@ -468,7 +469,7 @@ async def initialize(self) -> None:
         if self._agent_id:
             return
 
-        agentic_system_create_response = await self.client.agents.create(
+        agentic_system_create_response = await self.client.alpha.agents.create(
             agent_config=self.agent_config,
         )
         self._agent_id = agentic_system_create_response.agent_id
@@ -478,7 +479,7 @@ async def initialize(self) -> None:
 
     async def create_session(self, session_name: str) -> str:
         await self.initialize()
-        agentic_system_create_session_response = await self.client.agents.session.create(
+        agentic_system_create_session_response = await self.client.alpha.agents.session.create(
             agent_id=self.agent_id,
             session_name=session_name,
             extra_headers=self.extra_headers,
@@ -558,7 +559,7 @@ async def _create_turn_streaming(
         n_iter = 0
 
         # 1. create an agent turn
-        turn_response = await self.client.agents.turn.create(
+        turn_response = await self.client.alpha.agents.turn.create(
             agent_id=self.agent_id,
             # use specified session_id or last session created
             session_id=session_id or self.session_id[-1],
@@ -596,7 +597,7 @@ async def _create_turn_streaming(
                     tool_responses = await self._run_tool_calls(tool_calls)
 
                     # pass it to next iteration
-                    turn_response = await self.client.agents.turn.resume(
+                    turn_response = await self.client.alpha.agents.turn.resume(
                         agent_id=self.agent_id,
                         session_id=session_id or self.session_id[-1],
                         turn_id=turn_id,
diff --git a/src/llama_stack_client/lib/agents/client_tool.py b/src/llama_stack_client/lib/agents/client_tool.py
index c199b211..f017d651 100644
--- a/src/llama_stack_client/lib/agents/client_tool.py
+++ b/src/llama_stack_client/lib/agents/client_tool.py
@@ -19,7 +19,8 @@
     Union,
 )
 
-from llama_stack_client.types import CompletionMessage, Message, ToolResponse
+from llama_stack_client.types import CompletionMessage, Message
+from llama_stack_client.types.alpha import ToolResponse
 from llama_stack_client.types.tool_def_param import Parameter, ToolDefParam
 
 
diff --git a/src/llama_stack_client/lib/agents/tool_parser.py b/src/llama_stack_client/lib/agents/tool_parser.py
index dc0c5ba4..ca8d28ea 100644
--- a/src/llama_stack_client/lib/agents/tool_parser.py
+++ b/src/llama_stack_client/lib/agents/tool_parser.py
@@ -7,7 +7,7 @@
 from abc import abstractmethod
 from typing import List
 
-from llama_stack_client.types.agents.turn import CompletionMessage
+from llama_stack_client.types.alpha.agents.turn import CompletionMessage
 from llama_stack_client.types.shared.tool_call import ToolCall
 
 

From 51a54d458f950da36bccab067a46f255a49933a1 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 30 Sep 2025 19:56:51 +0000
Subject: [PATCH 6/7] feat(api): move post_training and eval under alpha
 namespace

---
 .stats.yml                                    |   2 +-
 api.md                                        | 138 +++++++++---------
 src/llama_stack_client/_client.py             |  76 ----------
 src/llama_stack_client/resources/__init__.py  |  28 ----
 .../resources/alpha/__init__.py               |  28 ++++
 .../resources/alpha/agents/agents.py          |  16 +-
 .../resources/alpha/agents/session.py         |  16 +-
 .../resources/alpha/agents/steps.py           |   4 +-
 .../resources/alpha/agents/turn.py            |  12 +-
 .../resources/alpha/alpha.py                  |  64 ++++++++
 .../resources/{ => alpha}/eval/__init__.py    |   0
 .../resources/{ => alpha}/eval/eval.py        |  46 +++---
 .../resources/{ => alpha}/eval/jobs.py        |  26 ++--
 .../{ => alpha}/post_training/__init__.py     |   0
 .../{ => alpha}/post_training/job.py          |  38 ++---
 .../post_training/post_training.py            |  32 ++--
 src/llama_stack_client/types/__init__.py      |  16 --
 .../types/alpha/__init__.py                   |  16 ++
 .../{ => alpha}/algorithm_config_param.py     |   2 +-
 .../{ => alpha}/benchmark_config_param.py     |   8 +-
 .../types/{ => alpha}/eval/__init__.py        |   0
 .../eval_evaluate_rows_alpha_params.py        |   2 +-
 .../{ => alpha}/eval_evaluate_rows_params.py  |   2 +-
 .../{ => alpha}/eval_run_eval_alpha_params.py |   0
 .../types/{ => alpha}/eval_run_eval_params.py |   0
 .../types/{ => alpha}/evaluate_response.py    |   4 +-
 .../types/{ => alpha}/job.py                  |   2 +-
 .../list_post_training_jobs_response.py       |   2 +-
 .../{ => alpha}/post_training/__init__.py     |   0
 .../post_training/job_artifacts_params.py     |   0
 .../post_training/job_artifacts_response.py   |   2 +-
 .../post_training/job_cancel_params.py        |   0
 .../post_training/job_list_response.py        |   2 +-
 .../post_training/job_status_params.py        |   0
 .../post_training/job_status_response.py      |   2 +-
 .../types/{ => alpha}/post_training_job.py    |   2 +-
 ...ost_training_preference_optimize_params.py |   0
 ...st_training_supervised_fine_tune_params.py |   0
 .../{ => alpha}/eval/__init__.py              |   0
 .../{ => alpha}/eval/test_jobs.py             |  62 ++++----
 .../{ => alpha}/post_training/__init__.py     |   0
 .../{ => alpha}/post_training/test_job.py     |  52 +++----
 tests/api_resources/{ => alpha}/test_eval.py  |  82 +++++------
 .../{ => alpha}/test_post_training.py         |  34 ++---
 44 files changed, 405 insertions(+), 413 deletions(-)
 rename src/llama_stack_client/resources/{ => alpha}/eval/__init__.py (100%)
 rename src/llama_stack_client/resources/{ => alpha}/eval/eval.py (94%)
 rename src/llama_stack_client/resources/{ => alpha}/eval/jobs.py (93%)
 rename src/llama_stack_client/resources/{ => alpha}/post_training/__init__.py (100%)
 rename src/llama_stack_client/resources/{ => alpha}/post_training/job.py (92%)
 rename src/llama_stack_client/resources/{ => alpha}/post_training/post_training.py (95%)
 rename src/llama_stack_client/types/{ => alpha}/algorithm_config_param.py (97%)
 rename src/llama_stack_client/types/{ => alpha}/benchmark_config_param.py (86%)
 rename src/llama_stack_client/types/{ => alpha}/eval/__init__.py (100%)
 rename src/llama_stack_client/types/{ => alpha}/eval_evaluate_rows_alpha_params.py (95%)
 rename src/llama_stack_client/types/{ => alpha}/eval_evaluate_rows_params.py (95%)
 rename src/llama_stack_client/types/{ => alpha}/eval_run_eval_alpha_params.py (100%)
 rename src/llama_stack_client/types/{ => alpha}/eval_run_eval_params.py (100%)
 rename src/llama_stack_client/types/{ => alpha}/evaluate_response.py (83%)
 rename src/llama_stack_client/types/{ => alpha}/job.py (91%)
 rename src/llama_stack_client/types/{ => alpha}/list_post_training_jobs_response.py (89%)
 rename src/llama_stack_client/types/{ => alpha}/post_training/__init__.py (100%)
 rename src/llama_stack_client/types/{ => alpha}/post_training/job_artifacts_params.py (100%)
 rename src/llama_stack_client/types/{ => alpha}/post_training/job_artifacts_response.py (97%)
 rename src/llama_stack_client/types/{ => alpha}/post_training/job_cancel_params.py (100%)
 rename src/llama_stack_client/types/{ => alpha}/post_training/job_list_response.py (90%)
 rename src/llama_stack_client/types/{ => alpha}/post_training/job_status_params.py (100%)
 rename src/llama_stack_client/types/{ => alpha}/post_training/job_status_response.py (98%)
 rename src/llama_stack_client/types/{ => alpha}/post_training_job.py (83%)
 rename src/llama_stack_client/types/{ => alpha}/post_training_preference_optimize_params.py (100%)
 rename src/llama_stack_client/types/{ => alpha}/post_training_supervised_fine_tune_params.py (100%)
 rename tests/api_resources/{ => alpha}/eval/__init__.py (100%)
 rename tests/api_resources/{ => alpha}/eval/test_jobs.py (82%)
 rename tests/api_resources/{ => alpha}/post_training/__init__.py (100%)
 rename tests/api_resources/{ => alpha}/post_training/test_job.py (81%)
 rename tests/api_resources/{ => alpha}/test_eval.py (93%)
 rename tests/api_resources/{ => alpha}/test_post_training.py (92%)

diff --git a/.stats.yml b/.stats.yml
index f7df1a90..448f9057 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 109
 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-4337a6181c2db17737133e944b4b660a5e00ea10dce6be3252918e39451e9b5f.yml
 openapi_spec_hash: a0bc8f4b5f45bc5741fed8eaa61171c3
-config_hash: 47ef2eb62d188340f22eb6dea3693f15
+config_hash: d8706905bf16d9e4141e88d5a778263b
diff --git a/api.md b/api.md
index 00db1cb8..15e91db6 100644
--- a/api.md
+++ b/api.md
@@ -125,29 +125,6 @@ Methods:
 - <code title="post /v1/datasets">client.datasets.<a href="./src/llama_stack_client/resources/datasets.py">register</a>(\*\*<a href="src/llama_stack_client/types/dataset_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/dataset_register_response.py">DatasetRegisterResponse</a></code>
 - <code title="delete /v1/datasets/{dataset_id}">client.datasets.<a href="./src/llama_stack_client/resources/datasets.py">unregister</a>(dataset_id) -> None</code>
 
-# Eval
-
-Types:
-
-```python
-from llama_stack_client.types import BenchmarkConfig, EvaluateResponse, Job
-```
-
-Methods:
-
-- <code title="post /v1/eval/benchmarks/{benchmark_id}/evaluations">client.eval.<a href="./src/llama_stack_client/resources/eval/eval.py">evaluate_rows</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/eval_evaluate_rows_params.py">params</a>) -> <a href="./src/llama_stack_client/types/evaluate_response.py">EvaluateResponse</a></code>
-- <code title="post /v1/eval/benchmarks/{benchmark_id}/evaluations">client.eval.<a href="./src/llama_stack_client/resources/eval/eval.py">evaluate_rows_alpha</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py">params</a>) -> <a href="./src/llama_stack_client/types/evaluate_response.py">EvaluateResponse</a></code>
-- <code title="post /v1/eval/benchmarks/{benchmark_id}/jobs">client.eval.<a href="./src/llama_stack_client/resources/eval/eval.py">run_eval</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/eval_run_eval_params.py">params</a>) -> <a href="./src/llama_stack_client/types/job.py">Job</a></code>
-- <code title="post /v1/eval/benchmarks/{benchmark_id}/jobs">client.eval.<a href="./src/llama_stack_client/resources/eval/eval.py">run_eval_alpha</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/eval_run_eval_alpha_params.py">params</a>) -> <a href="./src/llama_stack_client/types/job.py">Job</a></code>
-
-## Jobs
-
-Methods:
-
-- <code title="get /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result">client.eval.jobs.<a href="./src/llama_stack_client/resources/eval/jobs.py">retrieve</a>(job_id, \*, benchmark_id) -> <a href="./src/llama_stack_client/types/evaluate_response.py">EvaluateResponse</a></code>
-- <code title="delete /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.eval.jobs.<a href="./src/llama_stack_client/resources/eval/jobs.py">cancel</a>(job_id, \*, benchmark_id) -> None</code>
-- <code title="get /v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.eval.jobs.<a href="./src/llama_stack_client/resources/eval/jobs.py">status</a>(job_id, \*, benchmark_id) -> <a href="./src/llama_stack_client/types/job.py">Job</a></code>
-
 # Inspect
 
 Types:
@@ -332,38 +309,6 @@ Methods:
 
 - <code title="get /v1/models">client.models.openai.<a href="./src/llama_stack_client/resources/models/openai.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
 
-# PostTraining
-
-Types:
-
-```python
-from llama_stack_client.types import AlgorithmConfig, ListPostTrainingJobsResponse, PostTrainingJob
-```
-
-Methods:
-
-- <code title="post /v1/post-training/preference-optimize">client.post_training.<a href="./src/llama_stack_client/resources/post_training/post_training.py">preference_optimize</a>(\*\*<a href="src/llama_stack_client/types/post_training_preference_optimize_params.py">params</a>) -> <a href="./src/llama_stack_client/types/post_training_job.py">PostTrainingJob</a></code>
-- <code title="post /v1/post-training/supervised-fine-tune">client.post_training.<a href="./src/llama_stack_client/resources/post_training/post_training.py">supervised_fine_tune</a>(\*\*<a href="src/llama_stack_client/types/post_training_supervised_fine_tune_params.py">params</a>) -> <a href="./src/llama_stack_client/types/post_training_job.py">PostTrainingJob</a></code>
-
-## Job
-
-Types:
-
-```python
-from llama_stack_client.types.post_training import (
-    JobListResponse,
-    JobArtifactsResponse,
-    JobStatusResponse,
-)
-```
-
-Methods:
-
-- <code title="get /v1/post-training/jobs">client.post_training.job.<a href="./src/llama_stack_client/resources/post_training/job.py">list</a>() -> List[Data]</code>
-- <code title="get /v1/post-training/job/artifacts">client.post_training.job.<a href="./src/llama_stack_client/resources/post_training/job.py">artifacts</a>(\*\*<a href="src/llama_stack_client/types/post_training/job_artifacts_params.py">params</a>) -> <a href="./src/llama_stack_client/types/post_training/job_artifacts_response.py">JobArtifactsResponse</a></code>
-- <code title="post /v1/post-training/job/cancel">client.post_training.job.<a href="./src/llama_stack_client/resources/post_training/job.py">cancel</a>(\*\*<a href="src/llama_stack_client/types/post_training/job_cancel_params.py">params</a>) -> None</code>
-- <code title="get /v1/post-training/job/status">client.post_training.job.<a href="./src/llama_stack_client/resources/post_training/job.py">status</a>(\*\*<a href="src/llama_stack_client/types/post_training/job_status_params.py">params</a>) -> <a href="./src/llama_stack_client/types/post_training/job_status_response.py">JobStatusResponse</a></code>
-
 # Providers
 
 Types:
@@ -546,6 +491,65 @@ Methods:
 
 - <code title="post /v1alpha/inference/rerank">client.alpha.inference.<a href="./src/llama_stack_client/resources/alpha/inference.py">rerank</a>(\*\*<a href="src/llama_stack_client/types/alpha/inference_rerank_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/inference_rerank_response.py">InferenceRerankResponse</a></code>
 
+## PostTraining
+
+Types:
+
+```python
+from llama_stack_client.types.alpha import (
+    AlgorithmConfig,
+    ListPostTrainingJobsResponse,
+    PostTrainingJob,
+)
+```
+
+Methods:
+
+- <code title="post /v1alpha/post-training/preference-optimize">client.alpha.post_training.<a href="./src/llama_stack_client/resources/alpha/post_training/post_training.py">preference_optimize</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training_job.py">PostTrainingJob</a></code>
+- <code title="post /v1alpha/post-training/supervised-fine-tune">client.alpha.post_training.<a href="./src/llama_stack_client/resources/alpha/post_training/post_training.py">supervised_fine_tune</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training_job.py">PostTrainingJob</a></code>
+
+### Job
+
+Types:
+
+```python
+from llama_stack_client.types.alpha.post_training import (
+    JobListResponse,
+    JobArtifactsResponse,
+    JobStatusResponse,
+)
+```
+
+Methods:
+
+- <code title="get /v1alpha/post-training/jobs">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">list</a>() -> List[Data]</code>
+- <code title="get /v1alpha/post-training/job/artifacts">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">artifacts</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py">JobArtifactsResponse</a></code>
+- <code title="post /v1alpha/post-training/job/cancel">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">cancel</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training/job_cancel_params.py">params</a>) -> None</code>
+- <code title="get /v1alpha/post-training/job/status">client.alpha.post_training.job.<a href="./src/llama_stack_client/resources/alpha/post_training/job.py">status</a>(\*\*<a href="src/llama_stack_client/types/alpha/post_training/job_status_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/post_training/job_status_response.py">JobStatusResponse</a></code>
+
+## Eval
+
+Types:
+
+```python
+from llama_stack_client.types.alpha import BenchmarkConfig, EvaluateResponse, Job
+```
+
+Methods:
+
+- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">evaluate_rows</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/evaluate_response.py">EvaluateResponse</a></code>
+- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">evaluate_rows_alpha</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/evaluate_response.py">EvaluateResponse</a></code>
+- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/jobs">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">run_eval</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_run_eval_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/job.py">Job</a></code>
+- <code title="post /v1alpha/eval/benchmarks/{benchmark_id}/jobs">client.alpha.eval.<a href="./src/llama_stack_client/resources/alpha/eval/eval.py">run_eval_alpha</a>(benchmark_id, \*\*<a href="src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/job.py">Job</a></code>
+
+### Jobs
+
+Methods:
+
+- <code title="get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">retrieve</a>(job_id, \*, benchmark_id) -> <a href="./src/llama_stack_client/types/alpha/evaluate_response.py">EvaluateResponse</a></code>
+- <code title="delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">cancel</a>(job_id, \*, benchmark_id) -> None</code>
+- <code title="get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">status</a>(job_id, \*, benchmark_id) -> <a href="./src/llama_stack_client/types/alpha/job.py">Job</a></code>
+
 ## Agents
 
 Types:
@@ -565,10 +569,10 @@ from llama_stack_client.types.alpha import (
 
 Methods:
 
-- <code title="post /v1/agents">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">create</a>(\*\*<a href="src/llama_stack_client/types/alpha/agent_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agent_create_response.py">AgentCreateResponse</a></code>
-- <code title="get /v1/agents/{agent_id}">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">retrieve</a>(agent_id) -> <a href="./src/llama_stack_client/types/alpha/agent_retrieve_response.py">AgentRetrieveResponse</a></code>
-- <code title="get /v1/agents">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">list</a>(\*\*<a href="src/llama_stack_client/types/alpha/agent_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agent_list_response.py">AgentListResponse</a></code>
-- <code title="delete /v1/agents/{agent_id}">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">delete</a>(agent_id) -> None</code>
+- <code title="post /v1alpha/agents">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">create</a>(\*\*<a href="src/llama_stack_client/types/alpha/agent_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agent_create_response.py">AgentCreateResponse</a></code>
+- <code title="get /v1alpha/agents/{agent_id}">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">retrieve</a>(agent_id) -> <a href="./src/llama_stack_client/types/alpha/agent_retrieve_response.py">AgentRetrieveResponse</a></code>
+- <code title="get /v1alpha/agents">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">list</a>(\*\*<a href="src/llama_stack_client/types/alpha/agent_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agent_list_response.py">AgentListResponse</a></code>
+- <code title="delete /v1alpha/agents/{agent_id}">client.alpha.agents.<a href="./src/llama_stack_client/resources/alpha/agents/agents.py">delete</a>(agent_id) -> None</code>
 
 ### Session
 
@@ -584,10 +588,10 @@ from llama_stack_client.types.alpha.agents import (
 
 Methods:
 
-- <code title="post /v1/agents/{agent_id}/session">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">create</a>(agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session_create_response.py">SessionCreateResponse</a></code>
-- <code title="get /v1/agents/{agent_id}/session/{session_id}">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">retrieve</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session.py">Session</a></code>
-- <code title="get /v1/agents/{agent_id}/sessions">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">list</a>(agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session_list_response.py">SessionListResponse</a></code>
-- <code title="delete /v1/agents/{agent_id}/session/{session_id}">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">delete</a>(session_id, \*, agent_id) -> None</code>
+- <code title="post /v1alpha/agents/{agent_id}/session">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">create</a>(agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session_create_response.py">SessionCreateResponse</a></code>
+- <code title="get /v1alpha/agents/{agent_id}/session/{session_id}">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">retrieve</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session.py">Session</a></code>
+- <code title="get /v1alpha/agents/{agent_id}/sessions">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">list</a>(agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/session_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/session_list_response.py">SessionListResponse</a></code>
+- <code title="delete /v1alpha/agents/{agent_id}/session/{session_id}">client.alpha.agents.session.<a href="./src/llama_stack_client/resources/alpha/agents/session.py">delete</a>(session_id, \*, agent_id) -> None</code>
 
 ### Steps
 
@@ -599,7 +603,7 @@ from llama_stack_client.types.alpha.agents import StepRetrieveResponse
 
 Methods:
 
-- <code title="get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}">client.alpha.agents.steps.<a href="./src/llama_stack_client/resources/alpha/agents/steps.py">retrieve</a>(step_id, \*, agent_id, session_id, turn_id) -> <a href="./src/llama_stack_client/types/alpha/agents/step_retrieve_response.py">StepRetrieveResponse</a></code>
+- <code title="get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}">client.alpha.agents.steps.<a href="./src/llama_stack_client/resources/alpha/agents/steps.py">retrieve</a>(step_id, \*, agent_id, session_id, turn_id) -> <a href="./src/llama_stack_client/types/alpha/agents/step_retrieve_response.py">StepRetrieveResponse</a></code>
 
 ### Turn
 
@@ -615,6 +619,6 @@ from llama_stack_client.types.alpha.agents import (
 
 Methods:
 
-- <code title="post /v1/agents/{agent_id}/session/{session_id}/turn">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">create</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/turn_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
-- <code title="get /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">retrieve</a>(turn_id, \*, agent_id, session_id) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
-- <code title="post /v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">resume</a>(turn_id, \*, agent_id, session_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/turn_resume_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
+- <code title="post /v1alpha/agents/{agent_id}/session/{session_id}/turn">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">create</a>(session_id, \*, agent_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/turn_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
+- <code title="get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">retrieve</a>(turn_id, \*, agent_id, session_id) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
+- <code title="post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume">client.alpha.agents.turn.<a href="./src/llama_stack_client/resources/alpha/agents/turn.py">resume</a>(turn_id, \*, agent_id, session_id, \*\*<a href="src/llama_stack_client/types/alpha/agents/turn_resume_params.py">params</a>) -> <a href="./src/llama_stack_client/types/alpha/agents/turn.py">Turn</a></code>
diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
index 5c106b37..6b8f11b2 100644
--- a/src/llama_stack_client/_client.py
+++ b/src/llama_stack_client/_client.py
@@ -34,7 +34,6 @@
 if TYPE_CHECKING:
     from .resources import (
         chat,
-        eval,
         alpha,
         files,
         tools,
@@ -56,7 +55,6 @@
         completions,
         moderations,
         tool_runtime,
-        post_training,
         vector_stores,
         scoring_functions,
         synthetic_data_generation,
@@ -70,7 +68,6 @@
     from .resources.shields import ShieldsResource, AsyncShieldsResource
     from .resources.datasets import DatasetsResource, AsyncDatasetsResource
     from .resources.chat.chat import ChatResource, AsyncChatResource
-    from .resources.eval.eval import EvalResource, AsyncEvalResource
     from .resources.providers import ProvidersResource, AsyncProvidersResource
     from .resources.telemetry import TelemetryResource, AsyncTelemetryResource
     from .resources.vector_io import VectorIoResource, AsyncVectorIoResource
@@ -89,7 +86,6 @@
         AsyncSyntheticDataGenerationResource,
     )
     from .resources.tool_runtime.tool_runtime import ToolRuntimeResource, AsyncToolRuntimeResource
-    from .resources.post_training.post_training import PostTrainingResource, AsyncPostTrainingResource
     from .resources.vector_stores.vector_stores import VectorStoresResource, AsyncVectorStoresResource
 
 __all__ = [
@@ -191,12 +187,6 @@ def datasets(self) -> DatasetsResource:
 
         return DatasetsResource(self)
 
-    @cached_property
-    def eval(self) -> EvalResource:
-        from .resources.eval import EvalResource
-
-        return EvalResource(self)
-
     @cached_property
     def inspect(self) -> InspectResource:
         from .resources.inspect import InspectResource
@@ -245,12 +235,6 @@ def models(self) -> ModelsResource:
 
         return ModelsResource(self)
 
-    @cached_property
-    def post_training(self) -> PostTrainingResource:
-        from .resources.post_training import PostTrainingResource
-
-        return PostTrainingResource(self)
-
     @cached_property
     def providers(self) -> ProvidersResource:
         from .resources.providers import ProvidersResource
@@ -525,12 +509,6 @@ def datasets(self) -> AsyncDatasetsResource:
 
         return AsyncDatasetsResource(self)
 
-    @cached_property
-    def eval(self) -> AsyncEvalResource:
-        from .resources.eval import AsyncEvalResource
-
-        return AsyncEvalResource(self)
-
     @cached_property
     def inspect(self) -> AsyncInspectResource:
         from .resources.inspect import AsyncInspectResource
@@ -579,12 +557,6 @@ def models(self) -> AsyncModelsResource:
 
         return AsyncModelsResource(self)
 
-    @cached_property
-    def post_training(self) -> AsyncPostTrainingResource:
-        from .resources.post_training import AsyncPostTrainingResource
-
-        return AsyncPostTrainingResource(self)
-
     @cached_property
     def providers(self) -> AsyncProvidersResource:
         from .resources.providers import AsyncProvidersResource
@@ -808,12 +780,6 @@ def datasets(self) -> datasets.DatasetsResourceWithRawResponse:
 
         return DatasetsResourceWithRawResponse(self._client.datasets)
 
-    @cached_property
-    def eval(self) -> eval.EvalResourceWithRawResponse:
-        from .resources.eval import EvalResourceWithRawResponse
-
-        return EvalResourceWithRawResponse(self._client.eval)
-
     @cached_property
     def inspect(self) -> inspect.InspectResourceWithRawResponse:
         from .resources.inspect import InspectResourceWithRawResponse
@@ -862,12 +828,6 @@ def models(self) -> models.ModelsResourceWithRawResponse:
 
         return ModelsResourceWithRawResponse(self._client.models)
 
-    @cached_property
-    def post_training(self) -> post_training.PostTrainingResourceWithRawResponse:
-        from .resources.post_training import PostTrainingResourceWithRawResponse
-
-        return PostTrainingResourceWithRawResponse(self._client.post_training)
-
     @cached_property
     def providers(self) -> providers.ProvidersResourceWithRawResponse:
         from .resources.providers import ProvidersResourceWithRawResponse
@@ -977,12 +937,6 @@ def datasets(self) -> datasets.AsyncDatasetsResourceWithRawResponse:
 
         return AsyncDatasetsResourceWithRawResponse(self._client.datasets)
 
-    @cached_property
-    def eval(self) -> eval.AsyncEvalResourceWithRawResponse:
-        from .resources.eval import AsyncEvalResourceWithRawResponse
-
-        return AsyncEvalResourceWithRawResponse(self._client.eval)
-
     @cached_property
     def inspect(self) -> inspect.AsyncInspectResourceWithRawResponse:
         from .resources.inspect import AsyncInspectResourceWithRawResponse
@@ -1031,12 +985,6 @@ def models(self) -> models.AsyncModelsResourceWithRawResponse:
 
         return AsyncModelsResourceWithRawResponse(self._client.models)
 
-    @cached_property
-    def post_training(self) -> post_training.AsyncPostTrainingResourceWithRawResponse:
-        from .resources.post_training import AsyncPostTrainingResourceWithRawResponse
-
-        return AsyncPostTrainingResourceWithRawResponse(self._client.post_training)
-
     @cached_property
     def providers(self) -> providers.AsyncProvidersResourceWithRawResponse:
         from .resources.providers import AsyncProvidersResourceWithRawResponse
@@ -1148,12 +1096,6 @@ def datasets(self) -> datasets.DatasetsResourceWithStreamingResponse:
 
         return DatasetsResourceWithStreamingResponse(self._client.datasets)
 
-    @cached_property
-    def eval(self) -> eval.EvalResourceWithStreamingResponse:
-        from .resources.eval import EvalResourceWithStreamingResponse
-
-        return EvalResourceWithStreamingResponse(self._client.eval)
-
     @cached_property
     def inspect(self) -> inspect.InspectResourceWithStreamingResponse:
         from .resources.inspect import InspectResourceWithStreamingResponse
@@ -1202,12 +1144,6 @@ def models(self) -> models.ModelsResourceWithStreamingResponse:
 
         return ModelsResourceWithStreamingResponse(self._client.models)
 
-    @cached_property
-    def post_training(self) -> post_training.PostTrainingResourceWithStreamingResponse:
-        from .resources.post_training import PostTrainingResourceWithStreamingResponse
-
-        return PostTrainingResourceWithStreamingResponse(self._client.post_training)
-
     @cached_property
     def providers(self) -> providers.ProvidersResourceWithStreamingResponse:
         from .resources.providers import ProvidersResourceWithStreamingResponse
@@ -1319,12 +1255,6 @@ def datasets(self) -> datasets.AsyncDatasetsResourceWithStreamingResponse:
 
         return AsyncDatasetsResourceWithStreamingResponse(self._client.datasets)
 
-    @cached_property
-    def eval(self) -> eval.AsyncEvalResourceWithStreamingResponse:
-        from .resources.eval import AsyncEvalResourceWithStreamingResponse
-
-        return AsyncEvalResourceWithStreamingResponse(self._client.eval)
-
     @cached_property
     def inspect(self) -> inspect.AsyncInspectResourceWithStreamingResponse:
         from .resources.inspect import AsyncInspectResourceWithStreamingResponse
@@ -1373,12 +1303,6 @@ def models(self) -> models.AsyncModelsResourceWithStreamingResponse:
 
         return AsyncModelsResourceWithStreamingResponse(self._client.models)
 
-    @cached_property
-    def post_training(self) -> post_training.AsyncPostTrainingResourceWithStreamingResponse:
-        from .resources.post_training import AsyncPostTrainingResourceWithStreamingResponse
-
-        return AsyncPostTrainingResourceWithStreamingResponse(self._client.post_training)
-
     @cached_property
     def providers(self) -> providers.AsyncProvidersResourceWithStreamingResponse:
         from .resources.providers import AsyncProvidersResourceWithStreamingResponse
diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py
index 3ca1dedb..3089ae21 100644
--- a/src/llama_stack_client/resources/__init__.py
+++ b/src/llama_stack_client/resources/__init__.py
@@ -8,14 +8,6 @@
     ChatResourceWithStreamingResponse,
     AsyncChatResourceWithStreamingResponse,
 )
-from .eval import (
-    EvalResource,
-    AsyncEvalResource,
-    EvalResourceWithRawResponse,
-    AsyncEvalResourceWithRawResponse,
-    EvalResourceWithStreamingResponse,
-    AsyncEvalResourceWithStreamingResponse,
-)
 from .alpha import (
     AlphaResource,
     AsyncAlphaResource,
@@ -184,14 +176,6 @@
     ToolRuntimeResourceWithStreamingResponse,
     AsyncToolRuntimeResourceWithStreamingResponse,
 )
-from .post_training import (
-    PostTrainingResource,
-    AsyncPostTrainingResource,
-    PostTrainingResourceWithRawResponse,
-    AsyncPostTrainingResourceWithRawResponse,
-    PostTrainingResourceWithStreamingResponse,
-    AsyncPostTrainingResourceWithStreamingResponse,
-)
 from .vector_stores import (
     VectorStoresResource,
     AsyncVectorStoresResource,
@@ -248,12 +232,6 @@
     "AsyncDatasetsResourceWithRawResponse",
     "DatasetsResourceWithStreamingResponse",
     "AsyncDatasetsResourceWithStreamingResponse",
-    "EvalResource",
-    "AsyncEvalResource",
-    "EvalResourceWithRawResponse",
-    "AsyncEvalResourceWithRawResponse",
-    "EvalResourceWithStreamingResponse",
-    "AsyncEvalResourceWithStreamingResponse",
     "InspectResource",
     "AsyncInspectResource",
     "InspectResourceWithRawResponse",
@@ -302,12 +280,6 @@
     "AsyncModelsResourceWithRawResponse",
     "ModelsResourceWithStreamingResponse",
     "AsyncModelsResourceWithStreamingResponse",
-    "PostTrainingResource",
-    "AsyncPostTrainingResource",
-    "PostTrainingResourceWithRawResponse",
-    "AsyncPostTrainingResourceWithRawResponse",
-    "PostTrainingResourceWithStreamingResponse",
-    "AsyncPostTrainingResourceWithStreamingResponse",
     "ProvidersResource",
     "AsyncProvidersResource",
     "ProvidersResourceWithRawResponse",
diff --git a/src/llama_stack_client/resources/alpha/__init__.py b/src/llama_stack_client/resources/alpha/__init__.py
index 875a710b..c3c4b0d9 100644
--- a/src/llama_stack_client/resources/alpha/__init__.py
+++ b/src/llama_stack_client/resources/alpha/__init__.py
@@ -1,5 +1,13 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .eval import (
+    EvalResource,
+    AsyncEvalResource,
+    EvalResourceWithRawResponse,
+    AsyncEvalResourceWithRawResponse,
+    EvalResourceWithStreamingResponse,
+    AsyncEvalResourceWithStreamingResponse,
+)
 from .alpha import (
     AlphaResource,
     AsyncAlphaResource,
@@ -24,6 +32,14 @@
     InferenceResourceWithStreamingResponse,
     AsyncInferenceResourceWithStreamingResponse,
 )
+from .post_training import (
+    PostTrainingResource,
+    AsyncPostTrainingResource,
+    PostTrainingResourceWithRawResponse,
+    AsyncPostTrainingResourceWithRawResponse,
+    PostTrainingResourceWithStreamingResponse,
+    AsyncPostTrainingResourceWithStreamingResponse,
+)
 
 __all__ = [
     "InferenceResource",
@@ -32,6 +48,18 @@
     "AsyncInferenceResourceWithRawResponse",
     "InferenceResourceWithStreamingResponse",
     "AsyncInferenceResourceWithStreamingResponse",
+    "PostTrainingResource",
+    "AsyncPostTrainingResource",
+    "PostTrainingResourceWithRawResponse",
+    "AsyncPostTrainingResourceWithRawResponse",
+    "PostTrainingResourceWithStreamingResponse",
+    "AsyncPostTrainingResourceWithStreamingResponse",
+    "EvalResource",
+    "AsyncEvalResource",
+    "EvalResourceWithRawResponse",
+    "AsyncEvalResourceWithRawResponse",
+    "EvalResourceWithStreamingResponse",
+    "AsyncEvalResourceWithStreamingResponse",
     "AgentsResource",
     "AsyncAgentsResource",
     "AgentsResourceWithRawResponse",
diff --git a/src/llama_stack_client/resources/alpha/agents/agents.py b/src/llama_stack_client/resources/alpha/agents/agents.py
index b168179c..0e81cce7 100644
--- a/src/llama_stack_client/resources/alpha/agents/agents.py
+++ b/src/llama_stack_client/resources/alpha/agents/agents.py
@@ -106,7 +106,7 @@ def create(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._post(
-            "/v1/agents",
+            "/v1alpha/agents",
             body=maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -140,7 +140,7 @@ def retrieve(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         return self._get(
-            f"/v1/agents/{agent_id}",
+            f"/v1alpha/agents/{agent_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -176,7 +176,7 @@ def list(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._get(
-            "/v1/agents",
+            "/v1alpha/agents",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -220,7 +220,7 @@ def delete(
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return self._delete(
-            f"/v1/agents/{agent_id}",
+            f"/v1alpha/agents/{agent_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -286,7 +286,7 @@ async def create(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._post(
-            "/v1/agents",
+            "/v1alpha/agents",
             body=await async_maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -320,7 +320,7 @@ async def retrieve(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         return await self._get(
-            f"/v1/agents/{agent_id}",
+            f"/v1alpha/agents/{agent_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -356,7 +356,7 @@ async def list(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._get(
-            "/v1/agents",
+            "/v1alpha/agents",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -400,7 +400,7 @@ async def delete(
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return await self._delete(
-            f"/v1/agents/{agent_id}",
+            f"/v1alpha/agents/{agent_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
diff --git a/src/llama_stack_client/resources/alpha/agents/session.py b/src/llama_stack_client/resources/alpha/agents/session.py
index 37b51cea..2e980add 100644
--- a/src/llama_stack_client/resources/alpha/agents/session.py
+++ b/src/llama_stack_client/resources/alpha/agents/session.py
@@ -72,7 +72,7 @@ def create(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         return self._post(
-            f"/v1/agents/{agent_id}/session",
+            f"/v1alpha/agents/{agent_id}/session",
             body=maybe_transform({"session_name": session_name}, session_create_params.SessionCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -112,7 +112,7 @@ def retrieve(
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         return self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -155,7 +155,7 @@ def list(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         return self._get(
-            f"/v1/agents/{agent_id}/sessions",
+            f"/v1alpha/agents/{agent_id}/sessions",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -202,7 +202,7 @@ def delete(
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return self._delete(
-            f"/v1/agents/{agent_id}/session/{session_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -259,7 +259,7 @@ async def create(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         return await self._post(
-            f"/v1/agents/{agent_id}/session",
+            f"/v1alpha/agents/{agent_id}/session",
             body=await async_maybe_transform({"session_name": session_name}, session_create_params.SessionCreateParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -299,7 +299,7 @@ async def retrieve(
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         return await self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -344,7 +344,7 @@ async def list(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         return await self._get(
-            f"/v1/agents/{agent_id}/sessions",
+            f"/v1alpha/agents/{agent_id}/sessions",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -391,7 +391,7 @@ async def delete(
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return await self._delete(
-            f"/v1/agents/{agent_id}/session/{session_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
diff --git a/src/llama_stack_client/resources/alpha/agents/steps.py b/src/llama_stack_client/resources/alpha/agents/steps.py
index 42aec9ab..838822d0 100644
--- a/src/llama_stack_client/resources/alpha/agents/steps.py
+++ b/src/llama_stack_client/resources/alpha/agents/steps.py
@@ -74,7 +74,7 @@ def retrieve(
         if not step_id:
             raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
         return self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -137,7 +137,7 @@ async def retrieve(
         if not step_id:
             raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
         return await self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
diff --git a/src/llama_stack_client/resources/alpha/agents/turn.py b/src/llama_stack_client/resources/alpha/agents/turn.py
index 367a1b5a..ffe766b6 100644
--- a/src/llama_stack_client/resources/alpha/agents/turn.py
+++ b/src/llama_stack_client/resources/alpha/agents/turn.py
@@ -205,7 +205,7 @@ def create(
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         return self._post(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn",
             body=maybe_transform(
                 {
                     "messages": messages,
@@ -258,7 +258,7 @@ def retrieve(
         if not turn_id:
             raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
         return self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -402,7 +402,7 @@ def resume(
         if not turn_id:
             raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
         return self._post(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
             body=maybe_transform(
                 {
                     "tool_responses": tool_responses,
@@ -599,7 +599,7 @@ async def create(
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         return await self._post(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn",
             body=await async_maybe_transform(
                 {
                     "messages": messages,
@@ -652,7 +652,7 @@ async def retrieve(
         if not turn_id:
             raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
         return await self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -796,7 +796,7 @@ async def resume(
         if not turn_id:
             raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
         return await self._post(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
+            f"/v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
             body=await async_maybe_transform(
                 {
                     "tool_responses": tool_responses,
diff --git a/src/llama_stack_client/resources/alpha/alpha.py b/src/llama_stack_client/resources/alpha/alpha.py
index 9d5cfbe9..77d4115b 100644
--- a/src/llama_stack_client/resources/alpha/alpha.py
+++ b/src/llama_stack_client/resources/alpha/alpha.py
@@ -3,6 +3,14 @@
 from __future__ import annotations
 
 from ..._compat import cached_property
+from .eval.eval import (
+    EvalResource,
+    AsyncEvalResource,
+    EvalResourceWithRawResponse,
+    AsyncEvalResourceWithRawResponse,
+    EvalResourceWithStreamingResponse,
+    AsyncEvalResourceWithStreamingResponse,
+)
 from .inference import (
     InferenceResource,
     AsyncInferenceResource,
@@ -20,6 +28,14 @@
     AgentsResourceWithStreamingResponse,
     AsyncAgentsResourceWithStreamingResponse,
 )
+from .post_training.post_training import (
+    PostTrainingResource,
+    AsyncPostTrainingResource,
+    PostTrainingResourceWithRawResponse,
+    AsyncPostTrainingResourceWithRawResponse,
+    PostTrainingResourceWithStreamingResponse,
+    AsyncPostTrainingResourceWithStreamingResponse,
+)
 
 __all__ = ["AlphaResource", "AsyncAlphaResource"]
 
@@ -29,6 +45,14 @@ class AlphaResource(SyncAPIResource):
     def inference(self) -> InferenceResource:
         return InferenceResource(self._client)
 
+    @cached_property
+    def post_training(self) -> PostTrainingResource:
+        return PostTrainingResource(self._client)
+
+    @cached_property
+    def eval(self) -> EvalResource:
+        return EvalResource(self._client)
+
     @cached_property
     def agents(self) -> AgentsResource:
         return AgentsResource(self._client)
@@ -58,6 +82,14 @@ class AsyncAlphaResource(AsyncAPIResource):
     def inference(self) -> AsyncInferenceResource:
         return AsyncInferenceResource(self._client)
 
+    @cached_property
+    def post_training(self) -> AsyncPostTrainingResource:
+        return AsyncPostTrainingResource(self._client)
+
+    @cached_property
+    def eval(self) -> AsyncEvalResource:
+        return AsyncEvalResource(self._client)
+
     @cached_property
     def agents(self) -> AsyncAgentsResource:
         return AsyncAgentsResource(self._client)
@@ -90,6 +122,14 @@ def __init__(self, alpha: AlphaResource) -> None:
     def inference(self) -> InferenceResourceWithRawResponse:
         return InferenceResourceWithRawResponse(self._alpha.inference)
 
+    @cached_property
+    def post_training(self) -> PostTrainingResourceWithRawResponse:
+        return PostTrainingResourceWithRawResponse(self._alpha.post_training)
+
+    @cached_property
+    def eval(self) -> EvalResourceWithRawResponse:
+        return EvalResourceWithRawResponse(self._alpha.eval)
+
     @cached_property
     def agents(self) -> AgentsResourceWithRawResponse:
         return AgentsResourceWithRawResponse(self._alpha.agents)
@@ -103,6 +143,14 @@ def __init__(self, alpha: AsyncAlphaResource) -> None:
     def inference(self) -> AsyncInferenceResourceWithRawResponse:
         return AsyncInferenceResourceWithRawResponse(self._alpha.inference)
 
+    @cached_property
+    def post_training(self) -> AsyncPostTrainingResourceWithRawResponse:
+        return AsyncPostTrainingResourceWithRawResponse(self._alpha.post_training)
+
+    @cached_property
+    def eval(self) -> AsyncEvalResourceWithRawResponse:
+        return AsyncEvalResourceWithRawResponse(self._alpha.eval)
+
     @cached_property
     def agents(self) -> AsyncAgentsResourceWithRawResponse:
         return AsyncAgentsResourceWithRawResponse(self._alpha.agents)
@@ -116,6 +164,14 @@ def __init__(self, alpha: AlphaResource) -> None:
     def inference(self) -> InferenceResourceWithStreamingResponse:
         return InferenceResourceWithStreamingResponse(self._alpha.inference)
 
+    @cached_property
+    def post_training(self) -> PostTrainingResourceWithStreamingResponse:
+        return PostTrainingResourceWithStreamingResponse(self._alpha.post_training)
+
+    @cached_property
+    def eval(self) -> EvalResourceWithStreamingResponse:
+        return EvalResourceWithStreamingResponse(self._alpha.eval)
+
     @cached_property
     def agents(self) -> AgentsResourceWithStreamingResponse:
         return AgentsResourceWithStreamingResponse(self._alpha.agents)
@@ -129,6 +185,14 @@ def __init__(self, alpha: AsyncAlphaResource) -> None:
     def inference(self) -> AsyncInferenceResourceWithStreamingResponse:
         return AsyncInferenceResourceWithStreamingResponse(self._alpha.inference)
 
+    @cached_property
+    def post_training(self) -> AsyncPostTrainingResourceWithStreamingResponse:
+        return AsyncPostTrainingResourceWithStreamingResponse(self._alpha.post_training)
+
+    @cached_property
+    def eval(self) -> AsyncEvalResourceWithStreamingResponse:
+        return AsyncEvalResourceWithStreamingResponse(self._alpha.eval)
+
     @cached_property
     def agents(self) -> AsyncAgentsResourceWithStreamingResponse:
         return AsyncAgentsResourceWithStreamingResponse(self._alpha.agents)
diff --git a/src/llama_stack_client/resources/eval/__init__.py b/src/llama_stack_client/resources/alpha/eval/__init__.py
similarity index 100%
rename from src/llama_stack_client/resources/eval/__init__.py
rename to src/llama_stack_client/resources/alpha/eval/__init__.py
diff --git a/src/llama_stack_client/resources/eval/eval.py b/src/llama_stack_client/resources/alpha/eval/eval.py
similarity index 94%
rename from src/llama_stack_client/resources/eval/eval.py
rename to src/llama_stack_client/resources/alpha/eval/eval.py
index 87637875..b5347c0b 100644
--- a/src/llama_stack_client/resources/eval/eval.py
+++ b/src/llama_stack_client/resources/alpha/eval/eval.py
@@ -14,26 +14,26 @@
     JobsResourceWithStreamingResponse,
     AsyncJobsResourceWithStreamingResponse,
 )
-from ...types import (
-    eval_run_eval_params,
-    eval_evaluate_rows_params,
-    eval_run_eval_alpha_params,
-    eval_evaluate_rows_alpha_params,
-)
-from ..._types import Body, Query, Headers, NotGiven, SequenceNotStr, not_given
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Query, Headers, NotGiven, SequenceNotStr, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ...types.job import Job
-from ..._base_client import make_request_options
-from ...types.evaluate_response import EvaluateResponse
-from ...types.benchmark_config_param import BenchmarkConfigParam
+from ....types.alpha import (
+    eval_run_eval_params,
+    eval_evaluate_rows_params,
+    eval_run_eval_alpha_params,
+    eval_evaluate_rows_alpha_params,
+)
+from ...._base_client import make_request_options
+from ....types.alpha.job import Job
+from ....types.alpha.evaluate_response import EvaluateResponse
+from ....types.alpha.benchmark_config_param import BenchmarkConfigParam
 
 __all__ = ["EvalResource", "AsyncEvalResource"]
 
@@ -97,7 +97,7 @@ def evaluate_rows(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
             body=maybe_transform(
                 {
                     "benchmark_config": benchmark_config,
@@ -147,7 +147,7 @@ def evaluate_rows_alpha(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
             body=maybe_transform(
                 {
                     "benchmark_config": benchmark_config,
@@ -191,7 +191,7 @@ def run_eval(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
             body=maybe_transform({"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -228,7 +228,7 @@ def run_eval_alpha(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
             body=maybe_transform(
                 {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams
             ),
@@ -298,7 +298,7 @@ async def evaluate_rows(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return await self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
             body=await async_maybe_transform(
                 {
                     "benchmark_config": benchmark_config,
@@ -348,7 +348,7 @@ async def evaluate_rows_alpha(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return await self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
             body=await async_maybe_transform(
                 {
                     "benchmark_config": benchmark_config,
@@ -392,7 +392,7 @@ async def run_eval(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return await self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
             body=await async_maybe_transform(
                 {"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams
             ),
@@ -431,7 +431,7 @@ async def run_eval_alpha(
         if not benchmark_id:
             raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
         return await self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs",
             body=await async_maybe_transform(
                 {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams
             ),
diff --git a/src/llama_stack_client/resources/eval/jobs.py b/src/llama_stack_client/resources/alpha/eval/jobs.py
similarity index 93%
rename from src/llama_stack_client/resources/eval/jobs.py
rename to src/llama_stack_client/resources/alpha/eval/jobs.py
index 21f6aea6..8f0fa026 100644
--- a/src/llama_stack_client/resources/eval/jobs.py
+++ b/src/llama_stack_client/resources/alpha/eval/jobs.py
@@ -4,18 +4,18 @@
 
 import httpx
 
-from ..._types import Body, Query, Headers, NoneType, NotGiven, not_given
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Query, Headers, NoneType, NotGiven, not_given
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ...types.job import Job
-from ..._base_client import make_request_options
-from ...types.evaluate_response import EvaluateResponse
+from ...._base_client import make_request_options
+from ....types.alpha.job import Job
+from ....types.alpha.evaluate_response import EvaluateResponse
 
 __all__ = ["JobsResource", "AsyncJobsResource"]
 
@@ -69,7 +69,7 @@ def retrieve(
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         return self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -106,7 +106,7 @@ def cancel(
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return self._delete(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -142,7 +142,7 @@ def status(
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         return self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -199,7 +199,7 @@ async def retrieve(
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         return await self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -236,7 +236,7 @@ async def cancel(
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return await self._delete(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -272,7 +272,7 @@ async def status(
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         return await self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+            f"/v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
diff --git a/src/llama_stack_client/resources/post_training/__init__.py b/src/llama_stack_client/resources/alpha/post_training/__init__.py
similarity index 100%
rename from src/llama_stack_client/resources/post_training/__init__.py
rename to src/llama_stack_client/resources/alpha/post_training/__init__.py
diff --git a/src/llama_stack_client/resources/post_training/job.py b/src/llama_stack_client/resources/alpha/post_training/job.py
similarity index 92%
rename from src/llama_stack_client/resources/post_training/job.py
rename to src/llama_stack_client/resources/alpha/post_training/job.py
index ab00e054..083697a1 100644
--- a/src/llama_stack_client/resources/post_training/job.py
+++ b/src/llama_stack_client/resources/alpha/post_training/job.py
@@ -6,22 +6,22 @@
 
 import httpx
 
-from ..._types import Body, Query, Headers, NoneType, NotGiven, not_given
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Query, Headers, NoneType, NotGiven, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._wrappers import DataWrapper
-from ..._base_client import make_request_options
-from ...types.post_training import job_cancel_params, job_status_params, job_artifacts_params
-from ...types.list_post_training_jobs_response import Data
-from ...types.post_training.job_status_response import JobStatusResponse
-from ...types.post_training.job_artifacts_response import JobArtifactsResponse
+from ...._wrappers import DataWrapper
+from ...._base_client import make_request_options
+from ....types.alpha.post_training import job_cancel_params, job_status_params, job_artifacts_params
+from ....types.alpha.list_post_training_jobs_response import Data
+from ....types.alpha.post_training.job_status_response import JobStatusResponse
+from ....types.alpha.post_training.job_artifacts_response import JobArtifactsResponse
 
 __all__ = ["JobResource", "AsyncJobResource"]
 
@@ -58,7 +58,7 @@ def list(
     ) -> List[Data]:
         """Get all training jobs."""
         return self._get(
-            "/v1/post-training/jobs",
+            "/v1alpha/post-training/jobs",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -95,7 +95,7 @@ def artifacts(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._get(
-            "/v1/post-training/job/artifacts",
+            "/v1alpha/post-training/job/artifacts",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -133,7 +133,7 @@ def cancel(
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return self._post(
-            "/v1/post-training/job/cancel",
+            "/v1alpha/post-training/job/cancel",
             body=maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -167,7 +167,7 @@ def status(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._get(
-            "/v1/post-training/job/status",
+            "/v1alpha/post-training/job/status",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -211,7 +211,7 @@ async def list(
     ) -> List[Data]:
         """Get all training jobs."""
         return await self._get(
-            "/v1/post-training/jobs",
+            "/v1alpha/post-training/jobs",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -248,7 +248,7 @@ async def artifacts(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._get(
-            "/v1/post-training/job/artifacts",
+            "/v1alpha/post-training/job/artifacts",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -286,7 +286,7 @@ async def cancel(
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return await self._post(
-            "/v1/post-training/job/cancel",
+            "/v1alpha/post-training/job/cancel",
             body=await async_maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -320,7 +320,7 @@ async def status(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._get(
-            "/v1/post-training/job/status",
+            "/v1alpha/post-training/job/status",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
diff --git a/src/llama_stack_client/resources/post_training/post_training.py b/src/llama_stack_client/resources/alpha/post_training/post_training.py
similarity index 95%
rename from src/llama_stack_client/resources/post_training/post_training.py
rename to src/llama_stack_client/resources/alpha/post_training/post_training.py
index 760d9cb2..a26c813a 100644
--- a/src/llama_stack_client/resources/post_training/post_training.py
+++ b/src/llama_stack_client/resources/alpha/post_training/post_training.py
@@ -14,23 +14,23 @@
     JobResourceWithStreamingResponse,
     AsyncJobResourceWithStreamingResponse,
 )
-from ...types import (
-    post_training_preference_optimize_params,
-    post_training_supervised_fine_tune_params,
-)
-from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from ...._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._base_client import make_request_options
-from ...types.post_training_job import PostTrainingJob
-from ...types.algorithm_config_param import AlgorithmConfigParam
+from ....types.alpha import (
+    post_training_preference_optimize_params,
+    post_training_supervised_fine_tune_params,
+)
+from ...._base_client import make_request_options
+from ....types.alpha.post_training_job import PostTrainingJob
+from ....types.alpha.algorithm_config_param import AlgorithmConfigParam
 
 __all__ = ["PostTrainingResource", "AsyncPostTrainingResource"]
 
@@ -100,7 +100,7 @@ def preference_optimize(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._post(
-            "/v1/post-training/preference-optimize",
+            "/v1alpha/post-training/preference-optimize",
             body=maybe_transform(
                 {
                     "algorithm_config": algorithm_config,
@@ -162,7 +162,7 @@ def supervised_fine_tune(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return self._post(
-            "/v1/post-training/supervised-fine-tune",
+            "/v1alpha/post-training/supervised-fine-tune",
             body=maybe_transform(
                 {
                     "hyperparam_search_config": hyperparam_search_config,
@@ -247,7 +247,7 @@ async def preference_optimize(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._post(
-            "/v1/post-training/preference-optimize",
+            "/v1alpha/post-training/preference-optimize",
             body=await async_maybe_transform(
                 {
                     "algorithm_config": algorithm_config,
@@ -309,7 +309,7 @@ async def supervised_fine_tune(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         return await self._post(
-            "/v1/post-training/supervised-fine-tune",
+            "/v1alpha/post-training/supervised-fine-tune",
             body=await async_maybe_transform(
                 {
                     "hyperparam_search_config": hyperparam_search_config,
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index 2f955f5e..0c3d0f34 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-from .job import Job as Job
 from .file import File as File
 from .tool import Tool as Tool
 from .model import Model as Model
@@ -43,8 +42,6 @@
 from .file_list_params import FileListParams as FileListParams
 from .span_with_status import SpanWithStatus as SpanWithStatus
 from .tool_list_params import ToolListParams as ToolListParams
-from .evaluate_response import EvaluateResponse as EvaluateResponse
-from .post_training_job import PostTrainingJob as PostTrainingJob
 from .scoring_fn_params import ScoringFnParams as ScoringFnParams
 from .file_create_params import FileCreateParams as FileCreateParams
 from .tool_list_response import ToolListResponse as ToolListResponse
@@ -54,7 +51,6 @@
 from .route_list_response import RouteListResponse as RouteListResponse
 from .run_shield_response import RunShieldResponse as RunShieldResponse
 from .delete_file_response import DeleteFileResponse as DeleteFileResponse
-from .eval_run_eval_params import EvalRunEvalParams as EvalRunEvalParams
 from .list_models_response import ListModelsResponse as ListModelsResponse
 from .list_routes_response import ListRoutesResponse as ListRoutesResponse
 from .query_spans_response import QuerySpansResponse as QuerySpansResponse
@@ -67,8 +63,6 @@
 from .model_register_params import ModelRegisterParams as ModelRegisterParams
 from .query_chunks_response import QueryChunksResponse as QueryChunksResponse
 from .query_condition_param import QueryConditionParam as QueryConditionParam
-from .algorithm_config_param import AlgorithmConfigParam as AlgorithmConfigParam
-from .benchmark_config_param import BenchmarkConfigParam as BenchmarkConfigParam
 from .list_datasets_response import ListDatasetsResponse as ListDatasetsResponse
 from .provider_list_response import ProviderListResponse as ProviderListResponse
 from .response_create_params import ResponseCreateParams as ResponseCreateParams
@@ -99,13 +93,11 @@
 from .dataset_iterrows_response import DatasetIterrowsResponse as DatasetIterrowsResponse
 from .dataset_register_response import DatasetRegisterResponse as DatasetRegisterResponse
 from .dataset_retrieve_response import DatasetRetrieveResponse as DatasetRetrieveResponse
-from .eval_evaluate_rows_params import EvalEvaluateRowsParams as EvalEvaluateRowsParams
 from .list_tool_groups_response import ListToolGroupsResponse as ListToolGroupsResponse
 from .toolgroup_register_params import ToolgroupRegisterParams as ToolgroupRegisterParams
 from .vector_db_register_params import VectorDBRegisterParams as VectorDBRegisterParams
 from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
 from .create_embeddings_response import CreateEmbeddingsResponse as CreateEmbeddingsResponse
-from .eval_run_eval_alpha_params import EvalRunEvalAlphaParams as EvalRunEvalAlphaParams
 from .scoring_score_batch_params import ScoringScoreBatchParams as ScoringScoreBatchParams
 from .telemetry_log_event_params import TelemetryLogEventParams as TelemetryLogEventParams
 from .vector_store_create_params import VectorStoreCreateParams as VectorStoreCreateParams
@@ -125,11 +117,9 @@
 from .telemetry_query_metrics_params import TelemetryQueryMetricsParams as TelemetryQueryMetricsParams
 from .telemetry_query_spans_response import TelemetryQuerySpansResponse as TelemetryQuerySpansResponse
 from .tool_runtime_list_tools_params import ToolRuntimeListToolsParams as ToolRuntimeListToolsParams
-from .eval_evaluate_rows_alpha_params import EvalEvaluateRowsAlphaParams as EvalEvaluateRowsAlphaParams
 from .list_scoring_functions_response import ListScoringFunctionsResponse as ListScoringFunctionsResponse
 from .telemetry_query_traces_response import TelemetryQueryTracesResponse as TelemetryQueryTracesResponse
 from .tool_runtime_invoke_tool_params import ToolRuntimeInvokeToolParams as ToolRuntimeInvokeToolParams
-from .list_post_training_jobs_response import ListPostTrainingJobsResponse as ListPostTrainingJobsResponse
 from .scoring_function_register_params import ScoringFunctionRegisterParams as ScoringFunctionRegisterParams
 from .telemetry_get_span_tree_response import TelemetryGetSpanTreeResponse as TelemetryGetSpanTreeResponse
 from .telemetry_query_metrics_response import TelemetryQueryMetricsResponse as TelemetryQueryMetricsResponse
@@ -138,12 +128,6 @@
 from .telemetry_save_spans_to_dataset_params import (
     TelemetrySaveSpansToDatasetParams as TelemetrySaveSpansToDatasetParams,
 )
-from .post_training_preference_optimize_params import (
-    PostTrainingPreferenceOptimizeParams as PostTrainingPreferenceOptimizeParams,
-)
-from .post_training_supervised_fine_tune_params import (
-    PostTrainingSupervisedFineTuneParams as PostTrainingSupervisedFineTuneParams,
-)
 from .synthetic_data_generation_generate_params import (
     SyntheticDataGenerationGenerateParams as SyntheticDataGenerationGenerateParams,
 )
diff --git a/src/llama_stack_client/types/alpha/__init__.py b/src/llama_stack_client/types/alpha/__init__.py
index 0740f223..9651e73a 100644
--- a/src/llama_stack_client/types/alpha/__init__.py
+++ b/src/llama_stack_client/types/alpha/__init__.py
@@ -2,16 +2,32 @@
 
 from __future__ import annotations
 
+from .job import Job as Job
 from .tool_response import ToolResponse as ToolResponse
 from .inference_step import InferenceStep as InferenceStep
 from .shield_call_step import ShieldCallStep as ShieldCallStep
 from .agent_list_params import AgentListParams as AgentListParams
+from .evaluate_response import EvaluateResponse as EvaluateResponse
+from .post_training_job import PostTrainingJob as PostTrainingJob
 from .agent_create_params import AgentCreateParams as AgentCreateParams
 from .agent_list_response import AgentListResponse as AgentListResponse
 from .tool_execution_step import ToolExecutionStep as ToolExecutionStep
 from .tool_response_param import ToolResponseParam as ToolResponseParam
+from .eval_run_eval_params import EvalRunEvalParams as EvalRunEvalParams
 from .agent_create_response import AgentCreateResponse as AgentCreateResponse
 from .memory_retrieval_step import MemoryRetrievalStep as MemoryRetrievalStep
+from .algorithm_config_param import AlgorithmConfigParam as AlgorithmConfigParam
+from .benchmark_config_param import BenchmarkConfigParam as BenchmarkConfigParam
 from .agent_retrieve_response import AgentRetrieveResponse as AgentRetrieveResponse
 from .inference_rerank_params import InferenceRerankParams as InferenceRerankParams
+from .eval_evaluate_rows_params import EvalEvaluateRowsParams as EvalEvaluateRowsParams
 from .inference_rerank_response import InferenceRerankResponse as InferenceRerankResponse
+from .eval_run_eval_alpha_params import EvalRunEvalAlphaParams as EvalRunEvalAlphaParams
+from .eval_evaluate_rows_alpha_params import EvalEvaluateRowsAlphaParams as EvalEvaluateRowsAlphaParams
+from .list_post_training_jobs_response import ListPostTrainingJobsResponse as ListPostTrainingJobsResponse
+from .post_training_preference_optimize_params import (
+    PostTrainingPreferenceOptimizeParams as PostTrainingPreferenceOptimizeParams,
+)
+from .post_training_supervised_fine_tune_params import (
+    PostTrainingSupervisedFineTuneParams as PostTrainingSupervisedFineTuneParams,
+)
diff --git a/src/llama_stack_client/types/algorithm_config_param.py b/src/llama_stack_client/types/alpha/algorithm_config_param.py
similarity index 97%
rename from src/llama_stack_client/types/algorithm_config_param.py
rename to src/llama_stack_client/types/alpha/algorithm_config_param.py
index 6940953e..d6da8130 100644
--- a/src/llama_stack_client/types/algorithm_config_param.py
+++ b/src/llama_stack_client/types/alpha/algorithm_config_param.py
@@ -5,7 +5,7 @@
 from typing import Union
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from .._types import SequenceNotStr
+from ..._types import SequenceNotStr
 
 __all__ = ["AlgorithmConfigParam", "LoraFinetuningConfig", "QatFinetuningConfig"]
 
diff --git a/src/llama_stack_client/types/benchmark_config_param.py b/src/llama_stack_client/types/alpha/benchmark_config_param.py
similarity index 86%
rename from src/llama_stack_client/types/benchmark_config_param.py
rename to src/llama_stack_client/types/alpha/benchmark_config_param.py
index dc968521..4a3ea512 100644
--- a/src/llama_stack_client/types/benchmark_config_param.py
+++ b/src/llama_stack_client/types/alpha/benchmark_config_param.py
@@ -5,10 +5,10 @@
 from typing import Dict, Union
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from .scoring_fn_params_param import ScoringFnParamsParam
-from .shared_params.agent_config import AgentConfig
-from .shared_params.system_message import SystemMessage
-from .shared_params.sampling_params import SamplingParams
+from ..scoring_fn_params_param import ScoringFnParamsParam
+from ..shared_params.agent_config import AgentConfig
+from ..shared_params.system_message import SystemMessage
+from ..shared_params.sampling_params import SamplingParams
 
 __all__ = ["BenchmarkConfigParam", "EvalCandidate", "EvalCandidateModelCandidate", "EvalCandidateAgentCandidate"]
 
diff --git a/src/llama_stack_client/types/eval/__init__.py b/src/llama_stack_client/types/alpha/eval/__init__.py
similarity index 100%
rename from src/llama_stack_client/types/eval/__init__.py
rename to src/llama_stack_client/types/alpha/eval/__init__.py
diff --git a/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py b/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
similarity index 95%
rename from src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py
rename to src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
index e4953252..0422e224 100644
--- a/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py
+++ b/src/llama_stack_client/types/alpha/eval_evaluate_rows_alpha_params.py
@@ -5,7 +5,7 @@
 from typing import Dict, Union, Iterable
 from typing_extensions import Required, TypedDict
 
-from .._types import SequenceNotStr
+from ..._types import SequenceNotStr
 from .benchmark_config_param import BenchmarkConfigParam
 
 __all__ = ["EvalEvaluateRowsAlphaParams"]
diff --git a/src/llama_stack_client/types/eval_evaluate_rows_params.py b/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
similarity index 95%
rename from src/llama_stack_client/types/eval_evaluate_rows_params.py
rename to src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
index 128f363d..4ff9bd5b 100644
--- a/src/llama_stack_client/types/eval_evaluate_rows_params.py
+++ b/src/llama_stack_client/types/alpha/eval_evaluate_rows_params.py
@@ -5,7 +5,7 @@
 from typing import Dict, Union, Iterable
 from typing_extensions import Required, TypedDict
 
-from .._types import SequenceNotStr
+from ..._types import SequenceNotStr
 from .benchmark_config_param import BenchmarkConfigParam
 
 __all__ = ["EvalEvaluateRowsParams"]
diff --git a/src/llama_stack_client/types/eval_run_eval_alpha_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
similarity index 100%
rename from src/llama_stack_client/types/eval_run_eval_alpha_params.py
rename to src/llama_stack_client/types/alpha/eval_run_eval_alpha_params.py
diff --git a/src/llama_stack_client/types/eval_run_eval_params.py b/src/llama_stack_client/types/alpha/eval_run_eval_params.py
similarity index 100%
rename from src/llama_stack_client/types/eval_run_eval_params.py
rename to src/llama_stack_client/types/alpha/eval_run_eval_params.py
diff --git a/src/llama_stack_client/types/evaluate_response.py b/src/llama_stack_client/types/alpha/evaluate_response.py
similarity index 83%
rename from src/llama_stack_client/types/evaluate_response.py
rename to src/llama_stack_client/types/alpha/evaluate_response.py
index 8e463352..4cd2e0f7 100644
--- a/src/llama_stack_client/types/evaluate_response.py
+++ b/src/llama_stack_client/types/alpha/evaluate_response.py
@@ -2,8 +2,8 @@
 
 from typing import Dict, List, Union
 
-from .._models import BaseModel
-from .shared.scoring_result import ScoringResult
+from ..._models import BaseModel
+from ..shared.scoring_result import ScoringResult
 
 __all__ = ["EvaluateResponse"]
 
diff --git a/src/llama_stack_client/types/job.py b/src/llama_stack_client/types/alpha/job.py
similarity index 91%
rename from src/llama_stack_client/types/job.py
rename to src/llama_stack_client/types/alpha/job.py
index 9635de38..23506692 100644
--- a/src/llama_stack_client/types/job.py
+++ b/src/llama_stack_client/types/alpha/job.py
@@ -2,7 +2,7 @@
 
 from typing_extensions import Literal
 
-from .._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["Job"]
 
diff --git a/src/llama_stack_client/types/list_post_training_jobs_response.py b/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
similarity index 89%
rename from src/llama_stack_client/types/list_post_training_jobs_response.py
rename to src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
index 09d16628..746afe99 100644
--- a/src/llama_stack_client/types/list_post_training_jobs_response.py
+++ b/src/llama_stack_client/types/alpha/list_post_training_jobs_response.py
@@ -2,7 +2,7 @@
 
 from typing import List
 
-from .._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["ListPostTrainingJobsResponse", "Data"]
 
diff --git a/src/llama_stack_client/types/post_training/__init__.py b/src/llama_stack_client/types/alpha/post_training/__init__.py
similarity index 100%
rename from src/llama_stack_client/types/post_training/__init__.py
rename to src/llama_stack_client/types/alpha/post_training/__init__.py
diff --git a/src/llama_stack_client/types/post_training/job_artifacts_params.py b/src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
similarity index 100%
rename from src/llama_stack_client/types/post_training/job_artifacts_params.py
rename to src/llama_stack_client/types/alpha/post_training/job_artifacts_params.py
diff --git a/src/llama_stack_client/types/post_training/job_artifacts_response.py b/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
similarity index 97%
rename from src/llama_stack_client/types/post_training/job_artifacts_response.py
rename to src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
index 42784aee..74edff26 100644
--- a/src/llama_stack_client/types/post_training/job_artifacts_response.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_artifacts_response.py
@@ -3,7 +3,7 @@
 from typing import List, Optional
 from datetime import datetime
 
-from ..._models import BaseModel
+from ...._models import BaseModel
 
 __all__ = ["JobArtifactsResponse", "Checkpoint", "CheckpointTrainingMetrics"]
 
diff --git a/src/llama_stack_client/types/post_training/job_cancel_params.py b/src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
similarity index 100%
rename from src/llama_stack_client/types/post_training/job_cancel_params.py
rename to src/llama_stack_client/types/alpha/post_training/job_cancel_params.py
diff --git a/src/llama_stack_client/types/post_training/job_list_response.py b/src/llama_stack_client/types/alpha/post_training/job_list_response.py
similarity index 90%
rename from src/llama_stack_client/types/post_training/job_list_response.py
rename to src/llama_stack_client/types/alpha/post_training/job_list_response.py
index cb42da2d..33bd89f1 100644
--- a/src/llama_stack_client/types/post_training/job_list_response.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_list_response.py
@@ -3,7 +3,7 @@
 from typing import List
 from typing_extensions import TypeAlias
 
-from ..._models import BaseModel
+from ...._models import BaseModel
 
 __all__ = ["JobListResponse", "JobListResponseItem"]
 
diff --git a/src/llama_stack_client/types/post_training/job_status_params.py b/src/llama_stack_client/types/alpha/post_training/job_status_params.py
similarity index 100%
rename from src/llama_stack_client/types/post_training/job_status_params.py
rename to src/llama_stack_client/types/alpha/post_training/job_status_params.py
diff --git a/src/llama_stack_client/types/post_training/job_status_response.py b/src/llama_stack_client/types/alpha/post_training/job_status_response.py
similarity index 98%
rename from src/llama_stack_client/types/post_training/job_status_response.py
rename to src/llama_stack_client/types/alpha/post_training/job_status_response.py
index 94379579..1ccc9ca2 100644
--- a/src/llama_stack_client/types/post_training/job_status_response.py
+++ b/src/llama_stack_client/types/alpha/post_training/job_status_response.py
@@ -4,7 +4,7 @@
 from datetime import datetime
 from typing_extensions import Literal
 
-from ..._models import BaseModel
+from ...._models import BaseModel
 
 __all__ = ["JobStatusResponse", "Checkpoint", "CheckpointTrainingMetrics"]
 
diff --git a/src/llama_stack_client/types/post_training_job.py b/src/llama_stack_client/types/alpha/post_training_job.py
similarity index 83%
rename from src/llama_stack_client/types/post_training_job.py
rename to src/llama_stack_client/types/alpha/post_training_job.py
index d0ba5fce..7d9417db 100644
--- a/src/llama_stack_client/types/post_training_job.py
+++ b/src/llama_stack_client/types/alpha/post_training_job.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["PostTrainingJob"]
 
diff --git a/src/llama_stack_client/types/post_training_preference_optimize_params.py b/src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
similarity index 100%
rename from src/llama_stack_client/types/post_training_preference_optimize_params.py
rename to src/llama_stack_client/types/alpha/post_training_preference_optimize_params.py
diff --git a/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py b/src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
similarity index 100%
rename from src/llama_stack_client/types/post_training_supervised_fine_tune_params.py
rename to src/llama_stack_client/types/alpha/post_training_supervised_fine_tune_params.py
diff --git a/tests/api_resources/eval/__init__.py b/tests/api_resources/alpha/eval/__init__.py
similarity index 100%
rename from tests/api_resources/eval/__init__.py
rename to tests/api_resources/alpha/eval/__init__.py
diff --git a/tests/api_resources/eval/test_jobs.py b/tests/api_resources/alpha/eval/test_jobs.py
similarity index 82%
rename from tests/api_resources/eval/test_jobs.py
rename to tests/api_resources/alpha/eval/test_jobs.py
index 17b02896..f4ea9ce1 100644
--- a/tests/api_resources/eval/test_jobs.py
+++ b/tests/api_resources/alpha/eval/test_jobs.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import Job, EvaluateResponse
+from llama_stack_client.types.alpha import Job, EvaluateResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -19,7 +19,7 @@ class TestJobs:
 
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        job = client.eval.jobs.retrieve(
+        job = client.alpha.eval.jobs.retrieve(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -27,7 +27,7 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.eval.jobs.with_raw_response.retrieve(
+        response = client.alpha.eval.jobs.with_raw_response.retrieve(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -39,7 +39,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.eval.jobs.with_streaming_response.retrieve(
+        with client.alpha.eval.jobs.with_streaming_response.retrieve(
             job_id="job_id",
             benchmark_id="benchmark_id",
         ) as response:
@@ -54,20 +54,20 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.jobs.with_raw_response.retrieve(
+            client.alpha.eval.jobs.with_raw_response.retrieve(
                 job_id="job_id",
                 benchmark_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            client.eval.jobs.with_raw_response.retrieve(
+            client.alpha.eval.jobs.with_raw_response.retrieve(
                 job_id="",
                 benchmark_id="benchmark_id",
             )
 
     @parametrize
     def test_method_cancel(self, client: LlamaStackClient) -> None:
-        job = client.eval.jobs.cancel(
+        job = client.alpha.eval.jobs.cancel(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -75,7 +75,7 @@ def test_method_cancel(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
-        response = client.eval.jobs.with_raw_response.cancel(
+        response = client.alpha.eval.jobs.with_raw_response.cancel(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -87,7 +87,7 @@ def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
-        with client.eval.jobs.with_streaming_response.cancel(
+        with client.alpha.eval.jobs.with_streaming_response.cancel(
             job_id="job_id",
             benchmark_id="benchmark_id",
         ) as response:
@@ -102,20 +102,20 @@ def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_cancel(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.jobs.with_raw_response.cancel(
+            client.alpha.eval.jobs.with_raw_response.cancel(
                 job_id="job_id",
                 benchmark_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            client.eval.jobs.with_raw_response.cancel(
+            client.alpha.eval.jobs.with_raw_response.cancel(
                 job_id="",
                 benchmark_id="benchmark_id",
             )
 
     @parametrize
     def test_method_status(self, client: LlamaStackClient) -> None:
-        job = client.eval.jobs.status(
+        job = client.alpha.eval.jobs.status(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -123,7 +123,7 @@ def test_method_status(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_raw_response_status(self, client: LlamaStackClient) -> None:
-        response = client.eval.jobs.with_raw_response.status(
+        response = client.alpha.eval.jobs.with_raw_response.status(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -135,7 +135,7 @@ def test_raw_response_status(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_status(self, client: LlamaStackClient) -> None:
-        with client.eval.jobs.with_streaming_response.status(
+        with client.alpha.eval.jobs.with_streaming_response.status(
             job_id="job_id",
             benchmark_id="benchmark_id",
         ) as response:
@@ -150,13 +150,13 @@ def test_streaming_response_status(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_status(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.jobs.with_raw_response.status(
+            client.alpha.eval.jobs.with_raw_response.status(
                 job_id="job_id",
                 benchmark_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            client.eval.jobs.with_raw_response.status(
+            client.alpha.eval.jobs.with_raw_response.status(
                 job_id="",
                 benchmark_id="benchmark_id",
             )
@@ -169,7 +169,7 @@ class TestAsyncJobs:
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.eval.jobs.retrieve(
+        job = await async_client.alpha.eval.jobs.retrieve(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -177,7 +177,7 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.jobs.with_raw_response.retrieve(
+        response = await async_client.alpha.eval.jobs.with_raw_response.retrieve(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -189,7 +189,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.jobs.with_streaming_response.retrieve(
+        async with async_client.alpha.eval.jobs.with_streaming_response.retrieve(
             job_id="job_id",
             benchmark_id="benchmark_id",
         ) as response:
@@ -204,20 +204,20 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.retrieve(
+            await async_client.alpha.eval.jobs.with_raw_response.retrieve(
                 job_id="job_id",
                 benchmark_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.retrieve(
+            await async_client.alpha.eval.jobs.with_raw_response.retrieve(
                 job_id="",
                 benchmark_id="benchmark_id",
             )
 
     @parametrize
     async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.eval.jobs.cancel(
+        job = await async_client.alpha.eval.jobs.cancel(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -225,7 +225,7 @@ async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
 
     @parametrize
     async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.jobs.with_raw_response.cancel(
+        response = await async_client.alpha.eval.jobs.with_raw_response.cancel(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -237,7 +237,7 @@ async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.jobs.with_streaming_response.cancel(
+        async with async_client.alpha.eval.jobs.with_streaming_response.cancel(
             job_id="job_id",
             benchmark_id="benchmark_id",
         ) as response:
@@ -252,20 +252,20 @@ async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClie
     @parametrize
     async def test_path_params_cancel(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.cancel(
+            await async_client.alpha.eval.jobs.with_raw_response.cancel(
                 job_id="job_id",
                 benchmark_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.cancel(
+            await async_client.alpha.eval.jobs.with_raw_response.cancel(
                 job_id="",
                 benchmark_id="benchmark_id",
             )
 
     @parametrize
     async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.eval.jobs.status(
+        job = await async_client.alpha.eval.jobs.status(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -273,7 +273,7 @@ async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
 
     @parametrize
     async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.jobs.with_raw_response.status(
+        response = await async_client.alpha.eval.jobs.with_raw_response.status(
             job_id="job_id",
             benchmark_id="benchmark_id",
         )
@@ -285,7 +285,7 @@ async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.jobs.with_streaming_response.status(
+        async with async_client.alpha.eval.jobs.with_streaming_response.status(
             job_id="job_id",
             benchmark_id="benchmark_id",
         ) as response:
@@ -300,13 +300,13 @@ async def test_streaming_response_status(self, async_client: AsyncLlamaStackClie
     @parametrize
     async def test_path_params_status(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.status(
+            await async_client.alpha.eval.jobs.with_raw_response.status(
                 job_id="job_id",
                 benchmark_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.status(
+            await async_client.alpha.eval.jobs.with_raw_response.status(
                 job_id="",
                 benchmark_id="benchmark_id",
             )
diff --git a/tests/api_resources/post_training/__init__.py b/tests/api_resources/alpha/post_training/__init__.py
similarity index 100%
rename from tests/api_resources/post_training/__init__.py
rename to tests/api_resources/alpha/post_training/__init__.py
diff --git a/tests/api_resources/post_training/test_job.py b/tests/api_resources/alpha/post_training/test_job.py
similarity index 81%
rename from tests/api_resources/post_training/test_job.py
rename to tests/api_resources/alpha/post_training/test_job.py
index 158eafbc..3b47132f 100644
--- a/tests/api_resources/post_training/test_job.py
+++ b/tests/api_resources/alpha/post_training/test_job.py
@@ -9,11 +9,11 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.post_training import (
+from llama_stack_client.types.alpha.post_training import (
     JobStatusResponse,
     JobArtifactsResponse,
 )
-from llama_stack_client.types.list_post_training_jobs_response import Data
+from llama_stack_client.types.alpha.list_post_training_jobs_response import Data
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -23,12 +23,12 @@ class TestJob:
 
     @parametrize
     def test_method_list(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.list()
+        job = client.alpha.post_training.job.list()
         assert_matches_type(List[Data], job, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.post_training.job.with_raw_response.list()
+        response = client.alpha.post_training.job.with_raw_response.list()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -37,7 +37,7 @@ def test_raw_response_list(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.post_training.job.with_streaming_response.list() as response:
+        with client.alpha.post_training.job.with_streaming_response.list() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -48,14 +48,14 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_artifacts(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.artifacts(
+        job = client.alpha.post_training.job.artifacts(
             job_uuid="job_uuid",
         )
         assert_matches_type(JobArtifactsResponse, job, path=["response"])
 
     @parametrize
     def test_raw_response_artifacts(self, client: LlamaStackClient) -> None:
-        response = client.post_training.job.with_raw_response.artifacts(
+        response = client.alpha.post_training.job.with_raw_response.artifacts(
             job_uuid="job_uuid",
         )
 
@@ -66,7 +66,7 @@ def test_raw_response_artifacts(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_artifacts(self, client: LlamaStackClient) -> None:
-        with client.post_training.job.with_streaming_response.artifacts(
+        with client.alpha.post_training.job.with_streaming_response.artifacts(
             job_uuid="job_uuid",
         ) as response:
             assert not response.is_closed
@@ -79,14 +79,14 @@ def test_streaming_response_artifacts(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_cancel(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.cancel(
+        job = client.alpha.post_training.job.cancel(
             job_uuid="job_uuid",
         )
         assert job is None
 
     @parametrize
     def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
-        response = client.post_training.job.with_raw_response.cancel(
+        response = client.alpha.post_training.job.with_raw_response.cancel(
             job_uuid="job_uuid",
         )
 
@@ -97,7 +97,7 @@ def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
-        with client.post_training.job.with_streaming_response.cancel(
+        with client.alpha.post_training.job.with_streaming_response.cancel(
             job_uuid="job_uuid",
         ) as response:
             assert not response.is_closed
@@ -110,14 +110,14 @@ def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_status(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.status(
+        job = client.alpha.post_training.job.status(
             job_uuid="job_uuid",
         )
         assert_matches_type(JobStatusResponse, job, path=["response"])
 
     @parametrize
     def test_raw_response_status(self, client: LlamaStackClient) -> None:
-        response = client.post_training.job.with_raw_response.status(
+        response = client.alpha.post_training.job.with_raw_response.status(
             job_uuid="job_uuid",
         )
 
@@ -128,7 +128,7 @@ def test_raw_response_status(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_status(self, client: LlamaStackClient) -> None:
-        with client.post_training.job.with_streaming_response.status(
+        with client.alpha.post_training.job.with_streaming_response.status(
             job_uuid="job_uuid",
         ) as response:
             assert not response.is_closed
@@ -147,12 +147,12 @@ class TestAsyncJob:
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.list()
+        job = await async_client.alpha.post_training.job.list()
         assert_matches_type(List[Data], job, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.job.with_raw_response.list()
+        response = await async_client.alpha.post_training.job.with_raw_response.list()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -161,7 +161,7 @@ async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> N
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.job.with_streaming_response.list() as response:
+        async with async_client.alpha.post_training.job.with_streaming_response.list() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -172,14 +172,14 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
 
     @parametrize
     async def test_method_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.artifacts(
+        job = await async_client.alpha.post_training.job.artifacts(
             job_uuid="job_uuid",
         )
         assert_matches_type(JobArtifactsResponse, job, path=["response"])
 
     @parametrize
     async def test_raw_response_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.job.with_raw_response.artifacts(
+        response = await async_client.alpha.post_training.job.with_raw_response.artifacts(
             job_uuid="job_uuid",
         )
 
@@ -190,7 +190,7 @@ async def test_raw_response_artifacts(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.job.with_streaming_response.artifacts(
+        async with async_client.alpha.post_training.job.with_streaming_response.artifacts(
             job_uuid="job_uuid",
         ) as response:
             assert not response.is_closed
@@ -203,14 +203,14 @@ async def test_streaming_response_artifacts(self, async_client: AsyncLlamaStackC
 
     @parametrize
     async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.cancel(
+        job = await async_client.alpha.post_training.job.cancel(
             job_uuid="job_uuid",
         )
         assert job is None
 
     @parametrize
     async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.job.with_raw_response.cancel(
+        response = await async_client.alpha.post_training.job.with_raw_response.cancel(
             job_uuid="job_uuid",
         )
 
@@ -221,7 +221,7 @@ async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.job.with_streaming_response.cancel(
+        async with async_client.alpha.post_training.job.with_streaming_response.cancel(
             job_uuid="job_uuid",
         ) as response:
             assert not response.is_closed
@@ -234,14 +234,14 @@ async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClie
 
     @parametrize
     async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.status(
+        job = await async_client.alpha.post_training.job.status(
             job_uuid="job_uuid",
         )
         assert_matches_type(JobStatusResponse, job, path=["response"])
 
     @parametrize
     async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.job.with_raw_response.status(
+        response = await async_client.alpha.post_training.job.with_raw_response.status(
             job_uuid="job_uuid",
         )
 
@@ -252,7 +252,7 @@ async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) ->
 
     @parametrize
     async def test_streaming_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.job.with_streaming_response.status(
+        async with async_client.alpha.post_training.job.with_streaming_response.status(
             job_uuid="job_uuid",
         ) as response:
             assert not response.is_closed
diff --git a/tests/api_resources/test_eval.py b/tests/api_resources/alpha/test_eval.py
similarity index 93%
rename from tests/api_resources/test_eval.py
rename to tests/api_resources/alpha/test_eval.py
index 878b3d28..88bd0c0c 100644
--- a/tests/api_resources/test_eval.py
+++ b/tests/api_resources/alpha/test_eval.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
+from llama_stack_client.types.alpha import (
     Job,
     EvaluateResponse,
 )
@@ -22,7 +22,7 @@ class TestEval:
 
     @parametrize
     def test_method_evaluate_rows(self, client: LlamaStackClient) -> None:
-        eval = client.eval.evaluate_rows(
+        eval = client.alpha.eval.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -46,7 +46,7 @@ def test_method_evaluate_rows(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.eval.evaluate_rows(
+        eval = client.alpha.eval.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -81,7 +81,7 @@ def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) ->
 
     @parametrize
     def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None:
-        response = client.eval.with_raw_response.evaluate_rows(
+        response = client.alpha.eval.with_raw_response.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -109,7 +109,7 @@ def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> None:
-        with client.eval.with_streaming_response.evaluate_rows(
+        with client.alpha.eval.with_streaming_response.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -140,7 +140,7 @@ def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> Non
     @parametrize
     def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.with_raw_response.evaluate_rows(
+            client.alpha.eval.with_raw_response.evaluate_rows(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -163,7 +163,7 @@ def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        eval = client.eval.evaluate_rows_alpha(
+        eval = client.alpha.eval.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -187,7 +187,7 @@ def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.eval.evaluate_rows_alpha(
+        eval = client.alpha.eval.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -222,7 +222,7 @@ def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClie
 
     @parametrize
     def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        response = client.eval.with_raw_response.evaluate_rows_alpha(
+        response = client.alpha.eval.with_raw_response.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -250,7 +250,7 @@ def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> Non
 
     @parametrize
     def test_streaming_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        with client.eval.with_streaming_response.evaluate_rows_alpha(
+        with client.alpha.eval.with_streaming_response.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -281,7 +281,7 @@ def test_streaming_response_evaluate_rows_alpha(self, client: LlamaStackClient)
     @parametrize
     def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.with_raw_response.evaluate_rows_alpha(
+            client.alpha.eval.with_raw_response.evaluate_rows_alpha(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -304,7 +304,7 @@ def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None
 
     @parametrize
     def test_method_run_eval(self, client: LlamaStackClient) -> None:
-        eval = client.eval.run_eval(
+        eval = client.alpha.eval.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -326,7 +326,7 @@ def test_method_run_eval(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.eval.run_eval(
+        eval = client.alpha.eval.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -359,7 +359,7 @@ def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None
 
     @parametrize
     def test_raw_response_run_eval(self, client: LlamaStackClient) -> None:
-        response = client.eval.with_raw_response.run_eval(
+        response = client.alpha.eval.with_raw_response.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -385,7 +385,7 @@ def test_raw_response_run_eval(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None:
-        with client.eval.with_streaming_response.run_eval(
+        with client.alpha.eval.with_streaming_response.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -414,7 +414,7 @@ def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_path_params_run_eval(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.with_raw_response.run_eval(
+            client.alpha.eval.with_raw_response.run_eval(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -435,7 +435,7 @@ def test_path_params_run_eval(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        eval = client.eval.run_eval_alpha(
+        eval = client.alpha.eval.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -457,7 +457,7 @@ def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.eval.run_eval_alpha(
+        eval = client.alpha.eval.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -490,7 +490,7 @@ def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) -
 
     @parametrize
     def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        response = client.eval.with_raw_response.run_eval_alpha(
+        response = client.alpha.eval.with_raw_response.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -516,7 +516,7 @@ def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        with client.eval.with_streaming_response.run_eval_alpha(
+        with client.alpha.eval.with_streaming_response.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -545,7 +545,7 @@ def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> No
     @parametrize
     def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.with_raw_response.run_eval_alpha(
+            client.alpha.eval.with_raw_response.run_eval_alpha(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -572,7 +572,7 @@ class TestAsyncEval:
 
     @parametrize
     async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.evaluate_rows(
+        eval = await async_client.alpha.eval.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -596,7 +596,7 @@ async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -
 
     @parametrize
     async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.evaluate_rows(
+        eval = await async_client.alpha.eval.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -631,7 +631,7 @@ async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLla
 
     @parametrize
     async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.with_raw_response.evaluate_rows(
+        response = await async_client.alpha.eval.with_raw_response.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -659,7 +659,7 @@ async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackCli
 
     @parametrize
     async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.with_streaming_response.evaluate_rows(
+        async with async_client.alpha.eval.with_streaming_response.evaluate_rows(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -690,7 +690,7 @@ async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaSt
     @parametrize
     async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.with_raw_response.evaluate_rows(
+            await async_client.alpha.eval.with_raw_response.evaluate_rows(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -713,7 +713,7 @@ async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClie
 
     @parametrize
     async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.evaluate_rows_alpha(
+        eval = await async_client.alpha.eval.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -737,7 +737,7 @@ async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackCli
 
     @parametrize
     async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.evaluate_rows_alpha(
+        eval = await async_client.alpha.eval.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -772,7 +772,7 @@ async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: As
 
     @parametrize
     async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.with_raw_response.evaluate_rows_alpha(
+        response = await async_client.alpha.eval.with_raw_response.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -800,7 +800,7 @@ async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaSt
 
     @parametrize
     async def test_streaming_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.with_streaming_response.evaluate_rows_alpha(
+        async with async_client.alpha.eval.with_streaming_response.evaluate_rows_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -831,7 +831,7 @@ async def test_streaming_response_evaluate_rows_alpha(self, async_client: AsyncL
     @parametrize
     async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.with_raw_response.evaluate_rows_alpha(
+            await async_client.alpha.eval.with_raw_response.evaluate_rows_alpha(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -854,7 +854,7 @@ async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaSta
 
     @parametrize
     async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.run_eval(
+        eval = await async_client.alpha.eval.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -876,7 +876,7 @@ async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> Non
 
     @parametrize
     async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.run_eval(
+        eval = await async_client.alpha.eval.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -909,7 +909,7 @@ async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaSta
 
     @parametrize
     async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.with_raw_response.run_eval(
+        response = await async_client.alpha.eval.with_raw_response.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -935,7 +935,7 @@ async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.with_streaming_response.run_eval(
+        async with async_client.alpha.eval.with_streaming_response.run_eval(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -964,7 +964,7 @@ async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.with_raw_response.run_eval(
+            await async_client.alpha.eval.with_raw_response.run_eval(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
@@ -985,7 +985,7 @@ async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) -
 
     @parametrize
     async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.run_eval_alpha(
+        eval = await async_client.alpha.eval.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -1007,7 +1007,7 @@ async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient)
 
     @parametrize
     async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.run_eval_alpha(
+        eval = await async_client.alpha.eval.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -1040,7 +1040,7 @@ async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLl
 
     @parametrize
     async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.with_raw_response.run_eval_alpha(
+        response = await async_client.alpha.eval.with_raw_response.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -1066,7 +1066,7 @@ async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackCl
 
     @parametrize
     async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.with_streaming_response.run_eval_alpha(
+        async with async_client.alpha.eval.with_streaming_response.run_eval_alpha(
             benchmark_id="benchmark_id",
             benchmark_config={
                 "eval_candidate": {
@@ -1095,7 +1095,7 @@ async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaS
     @parametrize
     async def test_path_params_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.with_raw_response.run_eval_alpha(
+            await async_client.alpha.eval.with_raw_response.run_eval_alpha(
                 benchmark_id="",
                 benchmark_config={
                     "eval_candidate": {
diff --git a/tests/api_resources/test_post_training.py b/tests/api_resources/alpha/test_post_training.py
similarity index 92%
rename from tests/api_resources/test_post_training.py
rename to tests/api_resources/alpha/test_post_training.py
index 899a53ca..14229811 100644
--- a/tests/api_resources/test_post_training.py
+++ b/tests/api_resources/alpha/test_post_training.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
+from llama_stack_client.types.alpha import (
     PostTrainingJob,
 )
 
@@ -21,7 +21,7 @@ class TestPostTraining:
 
     @parametrize
     def test_method_preference_optimize(self, client: LlamaStackClient) -> None:
-        post_training = client.post_training.preference_optimize(
+        post_training = client.alpha.post_training.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -40,7 +40,7 @@ def test_method_preference_optimize(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_preference_optimize_with_all_params(self, client: LlamaStackClient) -> None:
-        post_training = client.post_training.preference_optimize(
+        post_training = client.alpha.post_training.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -82,7 +82,7 @@ def test_method_preference_optimize_with_all_params(self, client: LlamaStackClie
 
     @parametrize
     def test_raw_response_preference_optimize(self, client: LlamaStackClient) -> None:
-        response = client.post_training.with_raw_response.preference_optimize(
+        response = client.alpha.post_training.with_raw_response.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -105,7 +105,7 @@ def test_raw_response_preference_optimize(self, client: LlamaStackClient) -> Non
 
     @parametrize
     def test_streaming_response_preference_optimize(self, client: LlamaStackClient) -> None:
-        with client.post_training.with_streaming_response.preference_optimize(
+        with client.alpha.post_training.with_streaming_response.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -130,7 +130,7 @@ def test_streaming_response_preference_optimize(self, client: LlamaStackClient)
 
     @parametrize
     def test_method_supervised_fine_tune(self, client: LlamaStackClient) -> None:
-        post_training = client.post_training.supervised_fine_tune(
+        post_training = client.alpha.post_training.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -144,7 +144,7 @@ def test_method_supervised_fine_tune(self, client: LlamaStackClient) -> None:
 
     @parametrize
     def test_method_supervised_fine_tune_with_all_params(self, client: LlamaStackClient) -> None:
-        post_training = client.post_training.supervised_fine_tune(
+        post_training = client.alpha.post_training.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -193,7 +193,7 @@ def test_method_supervised_fine_tune_with_all_params(self, client: LlamaStackCli
 
     @parametrize
     def test_raw_response_supervised_fine_tune(self, client: LlamaStackClient) -> None:
-        response = client.post_training.with_raw_response.supervised_fine_tune(
+        response = client.alpha.post_training.with_raw_response.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -211,7 +211,7 @@ def test_raw_response_supervised_fine_tune(self, client: LlamaStackClient) -> No
 
     @parametrize
     def test_streaming_response_supervised_fine_tune(self, client: LlamaStackClient) -> None:
-        with client.post_training.with_streaming_response.supervised_fine_tune(
+        with client.alpha.post_training.with_streaming_response.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -237,7 +237,7 @@ class TestAsyncPostTraining:
 
     @parametrize
     async def test_method_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.post_training.preference_optimize(
+        post_training = await async_client.alpha.post_training.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -256,7 +256,7 @@ async def test_method_preference_optimize(self, async_client: AsyncLlamaStackCli
 
     @parametrize
     async def test_method_preference_optimize_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.post_training.preference_optimize(
+        post_training = await async_client.alpha.post_training.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -298,7 +298,7 @@ async def test_method_preference_optimize_with_all_params(self, async_client: As
 
     @parametrize
     async def test_raw_response_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.with_raw_response.preference_optimize(
+        response = await async_client.alpha.post_training.with_raw_response.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -321,7 +321,7 @@ async def test_raw_response_preference_optimize(self, async_client: AsyncLlamaSt
 
     @parametrize
     async def test_streaming_response_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.with_streaming_response.preference_optimize(
+        async with async_client.alpha.post_training.with_streaming_response.preference_optimize(
             algorithm_config={
                 "beta": 0,
                 "loss_type": "sigmoid",
@@ -346,7 +346,7 @@ async def test_streaming_response_preference_optimize(self, async_client: AsyncL
 
     @parametrize
     async def test_method_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.post_training.supervised_fine_tune(
+        post_training = await async_client.alpha.post_training.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -360,7 +360,7 @@ async def test_method_supervised_fine_tune(self, async_client: AsyncLlamaStackCl
 
     @parametrize
     async def test_method_supervised_fine_tune_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.post_training.supervised_fine_tune(
+        post_training = await async_client.alpha.post_training.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -409,7 +409,7 @@ async def test_method_supervised_fine_tune_with_all_params(self, async_client: A
 
     @parametrize
     async def test_raw_response_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.with_raw_response.supervised_fine_tune(
+        response = await async_client.alpha.post_training.with_raw_response.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},
@@ -427,7 +427,7 @@ async def test_raw_response_supervised_fine_tune(self, async_client: AsyncLlamaS
 
     @parametrize
     async def test_streaming_response_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.with_streaming_response.supervised_fine_tune(
+        async with async_client.alpha.post_training.with_streaming_response.supervised_fine_tune(
             hyperparam_search_config={"foo": True},
             job_uuid="job_uuid",
             logger_config={"foo": True},

From c29057314508eafea95e3e9a566b2ac1dbe5189c Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 30 Sep 2025 19:57:15 +0000
Subject: [PATCH 7/7] release: 0.3.0-alpha.2

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 17 +++++++++++++++++
 pyproject.toml                |  2 +-
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 1ae25264..fe81c38e 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.3.0-alpha.1"
+  ".": "0.3.0-alpha.2"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 93d68692..57ceb8eb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,22 @@
 # Changelog
 
+## 0.3.0-alpha.2 (2025-09-30)
+
+Full Changelog: [v0.3.0-alpha.1...v0.3.0-alpha.2](https://github.com/llamastack/llama-stack-client-python/compare/v0.3.0-alpha.1...v0.3.0-alpha.2)
+
+### Features
+
+* **api:** move post_training and eval under alpha namespace ([51a54d4](https://github.com/llamastack/llama-stack-client-python/commit/51a54d458f950da36bccab067a46f255a49933a1))
+* **api:** moving { rerank, agents } to `client.alpha.` ([04caaad](https://github.com/llamastack/llama-stack-client-python/commit/04caaad5e2631d66f5a0d30c07e1872d3aded894))
+* **api:** SDKs for vector store file batches ([ee323a0](https://github.com/llamastack/llama-stack-client-python/commit/ee323a0e29892ef53b7d98bf8ad943cd865fb785))
+* **api:** SDKs for vector store file batches apis ([6e0dcff](https://github.com/llamastack/llama-stack-client-python/commit/6e0dcff322ca2a62e027b0089c3bd6fdfec4f27a))
+
+
+### Bug Fixes
+
+* fix stream event model reference ([0208f2a](https://github.com/llamastack/llama-stack-client-python/commit/0208f2aa0153d65ed4d46123d4509c66d2730e3c))
+* **manual:** update lib/ references to use the alpha namespace ([08670f1](https://github.com/llamastack/llama-stack-client-python/commit/08670f1023a73edea559bd896a59cbbd127c5a6b))
+
 ## 0.3.0-alpha.1 (2025-09-30)
 
 Full Changelog: [v0.2.23-alpha.1...v0.3.0-alpha.1](https://github.com/llamastack/llama-stack-client-python/compare/v0.2.23-alpha.1...v0.3.0-alpha.1)
diff --git a/pyproject.toml b/pyproject.toml
index 3b50518e..ba98bc1a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_client"
-version = "0.3.0-alpha.1"
+version = "0.3.0-alpha.2"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "MIT"