From 320839b3b1b680064d7f75e1057bddb556ddb84f Mon Sep 17 00:00:00 2001
From: Hardik Shah <hjshah@meta.com>
Date: Mon, 16 Jun 2025 11:40:19 -0700
Subject: [PATCH] Sync updates from stainless branch: hardikjshah/dev

---
 src/llama_stack_client/_client.py             |   2 +-
 .../resources/embeddings.py                   |  10 +-
 .../{vector_stores => }/vector_stores.py      |  54 +----
 .../resources/vector_stores/__init__.py       |  33 ---
 .../resources/vector_stores/files.py          | 201 ------------------
 src/llama_stack_client/types/__init__.py      |   1 +
 .../types/create_embeddings_response.py       |  44 ++++
 .../types/dataset_iterrows_response.py        |   5 +-
 .../types/embeddings_response.py              |  42 +---
 .../types/vector_stores/__init__.py           |   6 -
 .../types/vector_stores/file_create_params.py |  46 ----
 .../types/vector_stores/vector_store_file.py  |  64 ------
 tests/api_resources/test_embeddings.py        |  18 +-
 tests/api_resources/vector_stores/__init__.py |   1 -
 .../api_resources/vector_stores/test_files.py | 126 -----------
 15 files changed, 82 insertions(+), 571 deletions(-)
 rename src/llama_stack_client/resources/{vector_stores => }/vector_stores.py (94%)
 delete mode 100644 src/llama_stack_client/resources/vector_stores/__init__.py
 delete mode 100644 src/llama_stack_client/resources/vector_stores/files.py
 create mode 100644 src/llama_stack_client/types/create_embeddings_response.py
 delete mode 100644 src/llama_stack_client/types/vector_stores/__init__.py
 delete mode 100644 src/llama_stack_client/types/vector_stores/file_create_params.py
 delete mode 100644 src/llama_stack_client/types/vector_stores/vector_store_file.py
 delete mode 100644 tests/api_resources/vector_stores/__init__.py
 delete mode 100644 tests/api_resources/vector_stores/test_files.py

diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
index 409d8f5c..177d0207 100644
--- a/src/llama_stack_client/_client.py
+++ b/src/llama_stack_client/_client.py
@@ -41,6 +41,7 @@
     toolgroups,
     vector_dbs,
     completions,
+    vector_stores,
     scoring_functions,
     synthetic_data_generation,
 )
@@ -57,7 +58,6 @@
 from .resources.responses import responses
 from .resources.tool_runtime import tool_runtime
 from .resources.post_training import post_training
-from .resources.vector_stores import vector_stores
 
 __all__ = [
     "Timeout",
diff --git a/src/llama_stack_client/resources/embeddings.py b/src/llama_stack_client/resources/embeddings.py
index ed389ff5..862653b3 100644
--- a/src/llama_stack_client/resources/embeddings.py
+++ b/src/llama_stack_client/resources/embeddings.py
@@ -18,7 +18,7 @@
     async_to_streamed_response_wrapper,
 )
 from .._base_client import make_request_options
-from ..types.embeddings_response import EmbeddingsResponse
+from ..types.create_embeddings_response import CreateEmbeddingsResponse
 
 __all__ = ["EmbeddingsResource", "AsyncEmbeddingsResource"]
 
@@ -57,7 +57,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> EmbeddingsResponse:
+    ) -> CreateEmbeddingsResponse:
         """
         Generate OpenAI-compatible embeddings for the given input using the specified
         model.
@@ -101,7 +101,7 @@ def create(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=EmbeddingsResponse,
+            cast_to=CreateEmbeddingsResponse,
         )
 
 
@@ -139,7 +139,7 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> EmbeddingsResponse:
+    ) -> CreateEmbeddingsResponse:
         """
         Generate OpenAI-compatible embeddings for the given input using the specified
         model.
@@ -183,7 +183,7 @@ async def create(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=EmbeddingsResponse,
+            cast_to=CreateEmbeddingsResponse,
         )
 
 
diff --git a/src/llama_stack_client/resources/vector_stores/vector_stores.py b/src/llama_stack_client/resources/vector_stores.py
similarity index 94%
rename from src/llama_stack_client/resources/vector_stores/vector_stores.py
rename to src/llama_stack_client/resources/vector_stores.py
index de0b8205..f7df15c9 100644
--- a/src/llama_stack_client/resources/vector_stores/vector_stores.py
+++ b/src/llama_stack_client/resources/vector_stores.py
@@ -6,44 +6,32 @@
 
 import httpx
 
-from .files import (
-    FilesResource,
-    AsyncFilesResource,
-    FilesResourceWithRawResponse,
-    AsyncFilesResourceWithRawResponse,
-    FilesResourceWithStreamingResponse,
-    AsyncFilesResourceWithStreamingResponse,
-)
-from ...types import (
+from ..types import (
     vector_store_list_params,
     vector_store_create_params,
     vector_store_search_params,
     vector_store_update_params,
 )
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._base_client import make_request_options
-from ...types.vector_store import VectorStore
-from ...types.list_vector_stores_response import ListVectorStoresResponse
-from ...types.vector_store_delete_response import VectorStoreDeleteResponse
-from ...types.vector_store_search_response import VectorStoreSearchResponse
+from .._base_client import make_request_options
+from ..types.vector_store import VectorStore
+from ..types.list_vector_stores_response import ListVectorStoresResponse
+from ..types.vector_store_delete_response import VectorStoreDeleteResponse
+from ..types.vector_store_search_response import VectorStoreSearchResponse
 
 __all__ = ["VectorStoresResource", "AsyncVectorStoresResource"]
 
 
 class VectorStoresResource(SyncAPIResource):
-    @cached_property
-    def files(self) -> FilesResource:
-        return FilesResource(self._client)
-
     @cached_property
     def with_raw_response(self) -> VectorStoresResourceWithRawResponse:
         """
@@ -373,10 +361,6 @@ def search(
 
 
 class AsyncVectorStoresResource(AsyncAPIResource):
-    @cached_property
-    def files(self) -> AsyncFilesResource:
-        return AsyncFilesResource(self._client)
-
     @cached_property
     def with_raw_response(self) -> AsyncVectorStoresResourceWithRawResponse:
         """
@@ -728,10 +712,6 @@ def __init__(self, vector_stores: VectorStoresResource) -> None:
             vector_stores.search,
         )
 
-    @cached_property
-    def files(self) -> FilesResourceWithRawResponse:
-        return FilesResourceWithRawResponse(self._vector_stores.files)
-
 
 class AsyncVectorStoresResourceWithRawResponse:
     def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
@@ -756,10 +736,6 @@ def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
             vector_stores.search,
         )
 
-    @cached_property
-    def files(self) -> AsyncFilesResourceWithRawResponse:
-        return AsyncFilesResourceWithRawResponse(self._vector_stores.files)
-
 
 class VectorStoresResourceWithStreamingResponse:
     def __init__(self, vector_stores: VectorStoresResource) -> None:
@@ -784,10 +760,6 @@ def __init__(self, vector_stores: VectorStoresResource) -> None:
             vector_stores.search,
         )
 
-    @cached_property
-    def files(self) -> FilesResourceWithStreamingResponse:
-        return FilesResourceWithStreamingResponse(self._vector_stores.files)
-
 
 class AsyncVectorStoresResourceWithStreamingResponse:
     def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
@@ -811,7 +783,3 @@ def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
         self.search = async_to_streamed_response_wrapper(
             vector_stores.search,
         )
-
-    @cached_property
-    def files(self) -> AsyncFilesResourceWithStreamingResponse:
-        return AsyncFilesResourceWithStreamingResponse(self._vector_stores.files)
diff --git a/src/llama_stack_client/resources/vector_stores/__init__.py b/src/llama_stack_client/resources/vector_stores/__init__.py
deleted file mode 100644
index 85d202da..00000000
--- a/src/llama_stack_client/resources/vector_stores/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .files import (
-    FilesResource,
-    AsyncFilesResource,
-    FilesResourceWithRawResponse,
-    AsyncFilesResourceWithRawResponse,
-    FilesResourceWithStreamingResponse,
-    AsyncFilesResourceWithStreamingResponse,
-)
-from .vector_stores import (
-    VectorStoresResource,
-    AsyncVectorStoresResource,
-    VectorStoresResourceWithRawResponse,
-    AsyncVectorStoresResourceWithRawResponse,
-    VectorStoresResourceWithStreamingResponse,
-    AsyncVectorStoresResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "FilesResource",
-    "AsyncFilesResource",
-    "FilesResourceWithRawResponse",
-    "AsyncFilesResourceWithRawResponse",
-    "FilesResourceWithStreamingResponse",
-    "AsyncFilesResourceWithStreamingResponse",
-    "VectorStoresResource",
-    "AsyncVectorStoresResource",
-    "VectorStoresResourceWithRawResponse",
-    "AsyncVectorStoresResourceWithRawResponse",
-    "VectorStoresResourceWithStreamingResponse",
-    "AsyncVectorStoresResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/vector_stores/files.py b/src/llama_stack_client/resources/vector_stores/files.py
deleted file mode 100644
index 2d6af9cf..00000000
--- a/src/llama_stack_client/resources/vector_stores/files.py
+++ /dev/null
@@ -1,201 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-
-import httpx
-
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._base_client import make_request_options
-from ...types.vector_stores import file_create_params
-from ...types.vector_stores.vector_store_file import VectorStoreFile
-
-__all__ = ["FilesResource", "AsyncFilesResource"]
-
-
-class FilesResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> FilesResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
-        """
-        return FilesResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> FilesResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
-        """
-        return FilesResourceWithStreamingResponse(self)
-
-    def create(
-        self,
-        vector_store_id: str,
-        *,
-        file_id: str,
-        attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """
-        Attach a file to a vector store.
-
-        Args:
-          file_id: The ID of the file to attach to the vector store.
-
-          attributes: The key-value attributes stored with the file, which can be used for filtering.
-
-          chunking_strategy: The chunking strategy to use for the file.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        return self._post(
-            f"/v1/openai/v1/vector_stores/{vector_store_id}/files",
-            body=maybe_transform(
-                {
-                    "file_id": file_id,
-                    "attributes": attributes,
-                    "chunking_strategy": chunking_strategy,
-                },
-                file_create_params.FileCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStoreFile,
-        )
-
-
-class AsyncFilesResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncFilesResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncFilesResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncFilesResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
-        """
-        return AsyncFilesResourceWithStreamingResponse(self)
-
-    async def create(
-        self,
-        vector_store_id: str,
-        *,
-        file_id: str,
-        attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """
-        Attach a file to a vector store.
-
-        Args:
-          file_id: The ID of the file to attach to the vector store.
-
-          attributes: The key-value attributes stored with the file, which can be used for filtering.
-
-          chunking_strategy: The chunking strategy to use for the file.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        return await self._post(
-            f"/v1/openai/v1/vector_stores/{vector_store_id}/files",
-            body=await async_maybe_transform(
-                {
-                    "file_id": file_id,
-                    "attributes": attributes,
-                    "chunking_strategy": chunking_strategy,
-                },
-                file_create_params.FileCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStoreFile,
-        )
-
-
-class FilesResourceWithRawResponse:
-    def __init__(self, files: FilesResource) -> None:
-        self._files = files
-
-        self.create = to_raw_response_wrapper(
-            files.create,
-        )
-
-
-class AsyncFilesResourceWithRawResponse:
-    def __init__(self, files: AsyncFilesResource) -> None:
-        self._files = files
-
-        self.create = async_to_raw_response_wrapper(
-            files.create,
-        )
-
-
-class FilesResourceWithStreamingResponse:
-    def __init__(self, files: FilesResource) -> None:
-        self._files = files
-
-        self.create = to_streamed_response_wrapper(
-            files.create,
-        )
-
-
-class AsyncFilesResourceWithStreamingResponse:
-    def __init__(self, files: AsyncFilesResource) -> None:
-        self._files = files
-
-        self.create = async_to_streamed_response_wrapper(
-            files.create,
-        )
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index a8316ddc..7f742ba5 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -119,6 +119,7 @@
 from .toolgroup_register_params import ToolgroupRegisterParams as ToolgroupRegisterParams
 from .vector_db_register_params import VectorDBRegisterParams as VectorDBRegisterParams
 from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
+from .create_embeddings_response import CreateEmbeddingsResponse as CreateEmbeddingsResponse
 from .eval_run_eval_alpha_params import EvalRunEvalAlphaParams as EvalRunEvalAlphaParams
 from .scoring_score_batch_params import ScoringScoreBatchParams as ScoringScoreBatchParams
 from .telemetry_log_event_params import TelemetryLogEventParams as TelemetryLogEventParams
diff --git a/src/llama_stack_client/types/create_embeddings_response.py b/src/llama_stack_client/types/create_embeddings_response.py
new file mode 100644
index 00000000..b5d04f1b
--- /dev/null
+++ b/src/llama_stack_client/types/create_embeddings_response.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["CreateEmbeddingsResponse", "Data", "Usage"]
+
+
+class Data(BaseModel):
+    embedding: Union[List[float], str]
+    """
+    The embedding vector as a list of floats (when encoding_format="float") or as a
+    base64-encoded string (when encoding_format="base64")
+    """
+
+    index: int
+    """The index of the embedding in the input list"""
+
+    object: Literal["embedding"]
+    """The object type, which will be "embedding" """
+
+
+class Usage(BaseModel):
+    prompt_tokens: int
+    """The number of tokens in the input"""
+
+    total_tokens: int
+    """The total number of tokens used"""
+
+
+class CreateEmbeddingsResponse(BaseModel):
+    data: List[Data]
+    """List of embedding data objects"""
+
+    model: str
+    """The model that was used to generate the embeddings"""
+
+    object: Literal["list"]
+    """The object type, which will be "list" """
+
+    usage: Usage
+    """Usage information"""
diff --git a/src/llama_stack_client/types/dataset_iterrows_response.py b/src/llama_stack_client/types/dataset_iterrows_response.py
index 9c451a8c..8681b018 100644
--- a/src/llama_stack_client/types/dataset_iterrows_response.py
+++ b/src/llama_stack_client/types/dataset_iterrows_response.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Dict, List, Union
+from typing import Dict, List, Union, Optional
 
 from .._models import BaseModel
 
@@ -13,3 +13,6 @@ class DatasetIterrowsResponse(BaseModel):
 
     has_more: bool
     """Whether there are more items available after this set"""
+
+    url: Optional[str] = None
+    """The URL for accessing this list"""
diff --git a/src/llama_stack_client/types/embeddings_response.py b/src/llama_stack_client/types/embeddings_response.py
index 6d37d207..f36c6b97 100644
--- a/src/llama_stack_client/types/embeddings_response.py
+++ b/src/llama_stack_client/types/embeddings_response.py
@@ -1,44 +1,16 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List, Union
-from typing_extensions import Literal
+from typing import List
 
 from .._models import BaseModel
 
-__all__ = ["EmbeddingsResponse", "Data", "Usage"]
-
-
-class Data(BaseModel):
-    embedding: Union[List[float], str]
-    """
-    The embedding vector as a list of floats (when encoding_format="float") or as a
-    base64-encoded string (when encoding_format="base64")
-    """
-
-    index: int
-    """The index of the embedding in the input list"""
-
-    object: Literal["embedding"]
-    """The object type, which will be "embedding" """
-
-
-class Usage(BaseModel):
-    prompt_tokens: int
-    """The number of tokens in the input"""
-
-    total_tokens: int
-    """The total number of tokens used"""
+__all__ = ["EmbeddingsResponse"]
 
 
 class EmbeddingsResponse(BaseModel):
-    data: List[Data]
-    """List of embedding data objects"""
+    embeddings: List[List[float]]
+    """List of embedding vectors, one per input content.
 
-    model: str
-    """The model that was used to generate the embeddings"""
-
-    object: Literal["list"]
-    """The object type, which will be "list" """
-
-    usage: Usage
-    """Usage information"""
+    Each embedding is a list of floats. The dimensionality of the embedding is
+    model-specific; you can check model metadata using /models/{model_id}
+    """
diff --git a/src/llama_stack_client/types/vector_stores/__init__.py b/src/llama_stack_client/types/vector_stores/__init__.py
deleted file mode 100644
index 550270e2..00000000
--- a/src/llama_stack_client/types/vector_stores/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .vector_store_file import VectorStoreFile as VectorStoreFile
-from .file_create_params import FileCreateParams as FileCreateParams
diff --git a/src/llama_stack_client/types/vector_stores/file_create_params.py b/src/llama_stack_client/types/vector_stores/file_create_params.py
deleted file mode 100644
index 66fbf624..00000000
--- a/src/llama_stack_client/types/vector_stores/file_create_params.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-__all__ = [
-    "FileCreateParams",
-    "ChunkingStrategy",
-    "ChunkingStrategyVectorStoreChunkingStrategyAuto",
-    "ChunkingStrategyVectorStoreChunkingStrategyStatic",
-    "ChunkingStrategyVectorStoreChunkingStrategyStaticStatic",
-]
-
-
-class FileCreateParams(TypedDict, total=False):
-    file_id: Required[str]
-    """The ID of the file to attach to the vector store."""
-
-    attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """The key-value attributes stored with the file, which can be used for filtering."""
-
-    chunking_strategy: ChunkingStrategy
-    """The chunking strategy to use for the file."""
-
-
-class ChunkingStrategyVectorStoreChunkingStrategyAuto(TypedDict, total=False):
-    type: Required[Literal["auto"]]
-
-
-class ChunkingStrategyVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
-    chunk_overlap_tokens: Required[int]
-
-    max_chunk_size_tokens: Required[int]
-
-
-class ChunkingStrategyVectorStoreChunkingStrategyStatic(TypedDict, total=False):
-    static: Required[ChunkingStrategyVectorStoreChunkingStrategyStaticStatic]
-
-    type: Required[Literal["static"]]
-
-
-ChunkingStrategy: TypeAlias = Union[
-    ChunkingStrategyVectorStoreChunkingStrategyAuto, ChunkingStrategyVectorStoreChunkingStrategyStatic
-]
diff --git a/src/llama_stack_client/types/vector_stores/vector_store_file.py b/src/llama_stack_client/types/vector_stores/vector_store_file.py
deleted file mode 100644
index 45ce03f8..00000000
--- a/src/llama_stack_client/types/vector_stores/vector_store_file.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = [
-    "VectorStoreFile",
-    "ChunkingStrategy",
-    "ChunkingStrategyVectorStoreChunkingStrategyAuto",
-    "ChunkingStrategyVectorStoreChunkingStrategyStatic",
-    "ChunkingStrategyVectorStoreChunkingStrategyStaticStatic",
-    "LastError",
-]
-
-
-class ChunkingStrategyVectorStoreChunkingStrategyAuto(BaseModel):
-    type: Literal["auto"]
-
-
-class ChunkingStrategyVectorStoreChunkingStrategyStaticStatic(BaseModel):
-    chunk_overlap_tokens: int
-
-    max_chunk_size_tokens: int
-
-
-class ChunkingStrategyVectorStoreChunkingStrategyStatic(BaseModel):
-    static: ChunkingStrategyVectorStoreChunkingStrategyStaticStatic
-
-    type: Literal["static"]
-
-
-ChunkingStrategy: TypeAlias = Annotated[
-    Union[ChunkingStrategyVectorStoreChunkingStrategyAuto, ChunkingStrategyVectorStoreChunkingStrategyStatic],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class LastError(BaseModel):
-    code: Literal["server_error", "rate_limit_exceeded"]
-
-    message: str
-
-
-class VectorStoreFile(BaseModel):
-    id: str
-
-    attributes: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    chunking_strategy: ChunkingStrategy
-
-    created_at: int
-
-    object: str
-
-    status: Literal["completed", "in_progress", "cancelled", "failed"]
-
-    usage_bytes: int
-
-    vector_store_id: str
-
-    last_error: Optional[LastError] = None
diff --git a/tests/api_resources/test_embeddings.py b/tests/api_resources/test_embeddings.py
index 6f997c32..5d002024 100644
--- a/tests/api_resources/test_embeddings.py
+++ b/tests/api_resources/test_embeddings.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import EmbeddingsResponse
+from llama_stack_client.types import CreateEmbeddingsResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -23,7 +23,7 @@ def test_method_create(self, client: LlamaStackClient) -> None:
             input="string",
             model="model",
         )
-        assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
@@ -34,7 +34,7 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
             encoding_format="encoding_format",
             user="user",
         )
-        assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: LlamaStackClient) -> None:
@@ -46,7 +46,7 @@ def test_raw_response_create(self, client: LlamaStackClient) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         embedding = response.parse()
-        assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
     @parametrize
     def test_streaming_response_create(self, client: LlamaStackClient) -> None:
@@ -58,7 +58,7 @@ def test_streaming_response_create(self, client: LlamaStackClient) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             embedding = response.parse()
-            assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+            assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -72,7 +72,7 @@ async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
             input="string",
             model="model",
         )
-        assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
@@ -83,7 +83,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack
             encoding_format="encoding_format",
             user="user",
         )
-        assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
@@ -95,7 +95,7 @@ async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) ->
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         embedding = await response.parse()
-        assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
@@ -107,6 +107,6 @@ async def test_streaming_response_create(self, async_client: AsyncLlamaStackClie
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             embedding = await response.parse()
-            assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+            assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
         assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/vector_stores/__init__.py b/tests/api_resources/vector_stores/__init__.py
deleted file mode 100644
index fd8019a9..00000000
--- a/tests/api_resources/vector_stores/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/vector_stores/test_files.py b/tests/api_resources/vector_stores/test_files.py
deleted file mode 100644
index c88e5c95..00000000
--- a/tests/api_resources/vector_stores/test_files.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.vector_stores import VectorStoreFile
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestFiles:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create(self, client: LlamaStackClient) -> None:
-        file = client.vector_stores.files.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-        )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
-        file = client.vector_stores.files.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-            attributes={"foo": True},
-            chunking_strategy={"type": "auto"},
-        )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_create(self, client: LlamaStackClient) -> None:
-        response = client.vector_stores.files.with_raw_response.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
-        with client.vector_stores.files.with_streaming_response.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(VectorStoreFile, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_create(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.vector_stores.files.with_raw_response.create(
-                vector_store_id="",
-                file_id="file_id",
-            )
-
-
-class TestAsyncFiles:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
-        file = await async_client.vector_stores.files.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-        )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        file = await async_client.vector_stores.files.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-            attributes={"foo": True},
-            chunking_strategy={"type": "auto"},
-        )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_stores.files.with_raw_response.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = await response.parse()
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_stores.files.with_streaming_response.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(VectorStoreFile, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_create(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.vector_stores.files.with_raw_response.create(
-                vector_store_id="",
-                file_id="file_id",
-            )