From 05af878a1601101467dd5ec19d9a6800717f46b3 Mon Sep 17 00:00:00 2001
From: Hardik Shah <hjshah@meta.com>
Date: Fri, 13 Jun 2025 16:13:49 -0700
Subject: [PATCH] Sync updates from stainless branch: hardikjshah/dev

---
 src/llama_stack_client/_base_client.py        |  18 +-
 src/llama_stack_client/_client.py             |   2 +-
 .../resources/completions.py                  |  22 ++
 .../resources/vector_stores/__init__.py       |  33 +++
 .../resources/vector_stores/files.py          | 201 ++++++++++++++++++
 .../{ => vector_stores}/vector_stores.py      |  54 ++++-
 .../types/completion_create_params.py         |   3 +
 .../types/response_create_params.py           |  20 +-
 .../types/response_list_response.py           |  28 +++
 .../types/response_object.py                  |  14 ++
 .../types/response_object_stream.py           |  28 +++
 .../responses/input_item_list_response.py     |  16 +-
 .../types/shared/query_config.py              |  41 +++-
 .../types/shared_params/query_config.py       |  40 +++-
 .../types/vector_stores/__init__.py           |   6 +
 .../types/vector_stores/file_create_params.py |  46 ++++
 .../types/vector_stores/vector_store_file.py  |  64 ++++++
 tests/api_resources/test_completions.py       |   4 +
 .../tool_runtime/test_rag_tool.py             |   8 +
 tests/api_resources/vector_stores/__init__.py |   1 +
 .../api_resources/vector_stores/test_files.py | 126 +++++++++++
 21 files changed, 753 insertions(+), 22 deletions(-)
 create mode 100644 src/llama_stack_client/resources/vector_stores/__init__.py
 create mode 100644 src/llama_stack_client/resources/vector_stores/files.py
 rename src/llama_stack_client/resources/{ => vector_stores}/vector_stores.py (94%)
 create mode 100644 src/llama_stack_client/types/vector_stores/__init__.py
 create mode 100644 src/llama_stack_client/types/vector_stores/file_create_params.py
 create mode 100644 src/llama_stack_client/types/vector_stores/vector_store_file.py
 create mode 100644 tests/api_resources/vector_stores/__init__.py
 create mode 100644 tests/api_resources/vector_stores/test_files.py

diff --git a/src/llama_stack_client/_base_client.py b/src/llama_stack_client/_base_client.py
index 4d199940..4475c723 100644
--- a/src/llama_stack_client/_base_client.py
+++ b/src/llama_stack_client/_base_client.py
@@ -1071,7 +1071,14 @@ def _process_response(
     ) -> ResponseT:
         origin = get_origin(cast_to) or cast_to
 
-        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+        if (
+            inspect.isclass(origin)
+            and issubclass(origin, BaseAPIResponse)
+            # we only want to actually return the custom BaseAPIResponse class if we're
+            # returning the raw response, or if we're not streaming SSE, as if we're streaming
+            # SSE then `cast_to` doesn't actively reflect the type we need to parse into
+            and (not stream or bool(response.request.headers.get(RAW_RESPONSE_HEADER)))
+        ):
             if not issubclass(origin, APIResponse):
                 raise TypeError(f"API Response types must subclass {APIResponse}; Received {origin}")
 
@@ -1574,7 +1581,14 @@ async def _process_response(
     ) -> ResponseT:
         origin = get_origin(cast_to) or cast_to
 
-        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
+        if (
+            inspect.isclass(origin)
+            and issubclass(origin, BaseAPIResponse)
+            # we only want to actually return the custom BaseAPIResponse class if we're
+            # returning the raw response, or if we're not streaming SSE, as if we're streaming
+            # SSE then `cast_to` doesn't actively reflect the type we need to parse into
+            and (not stream or bool(response.request.headers.get(RAW_RESPONSE_HEADER)))
+        ):
             if not issubclass(origin, AsyncAPIResponse):
                 raise TypeError(f"API Response types must subclass {AsyncAPIResponse}; Received {origin}")
 
diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
index 177d0207..409d8f5c 100644
--- a/src/llama_stack_client/_client.py
+++ b/src/llama_stack_client/_client.py
@@ -41,7 +41,6 @@
     toolgroups,
     vector_dbs,
     completions,
-    vector_stores,
     scoring_functions,
     synthetic_data_generation,
 )
@@ -58,6 +57,7 @@
 from .resources.responses import responses
 from .resources.tool_runtime import tool_runtime
 from .resources.post_training import post_training
+from .resources.vector_stores import vector_stores
 
 __all__ = [
     "Timeout",
diff --git a/src/llama_stack_client/resources/completions.py b/src/llama_stack_client/resources/completions.py
index dfdd80f4..c4acf525 100644
--- a/src/llama_stack_client/resources/completions.py
+++ b/src/llama_stack_client/resources/completions.py
@@ -65,6 +65,7 @@ def create(
         stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
         stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
+        suffix: str | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         top_p: float | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -109,6 +110,8 @@ def create(
 
           stream_options: (Optional) The stream options to use.
 
+          suffix: (Optional) The suffix that should be appended to the completion.
+
           temperature: (Optional) The temperature to use.
 
           top_p: (Optional) The top p to use.
@@ -145,6 +148,7 @@ def create(
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
         stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
+        suffix: str | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         top_p: float | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -189,6 +193,8 @@ def create(
 
           stream_options: (Optional) The stream options to use.
 
+          suffix: (Optional) The suffix that should be appended to the completion.
+
           temperature: (Optional) The temperature to use.
 
           top_p: (Optional) The top p to use.
@@ -225,6 +231,7 @@ def create(
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
         stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
+        suffix: str | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         top_p: float | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -269,6 +276,8 @@ def create(
 
           stream_options: (Optional) The stream options to use.
 
+          suffix: (Optional) The suffix that should be appended to the completion.
+
           temperature: (Optional) The temperature to use.
 
           top_p: (Optional) The top p to use.
@@ -305,6 +314,7 @@ def create(
         stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
         stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
+        suffix: str | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         top_p: float | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -335,6 +345,7 @@ def create(
                     "stop": stop,
                     "stream": stream,
                     "stream_options": stream_options,
+                    "suffix": suffix,
                     "temperature": temperature,
                     "top_p": top_p,
                     "user": user,
@@ -392,6 +403,7 @@ async def create(
         stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
         stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
+        suffix: str | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         top_p: float | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -436,6 +448,8 @@ async def create(
 
           stream_options: (Optional) The stream options to use.
 
+          suffix: (Optional) The suffix that should be appended to the completion.
+
           temperature: (Optional) The temperature to use.
 
           top_p: (Optional) The top p to use.
@@ -472,6 +486,7 @@ async def create(
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
         stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
+        suffix: str | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         top_p: float | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -516,6 +531,8 @@ async def create(
 
           stream_options: (Optional) The stream options to use.
 
+          suffix: (Optional) The suffix that should be appended to the completion.
+
           temperature: (Optional) The temperature to use.
 
           top_p: (Optional) The top p to use.
@@ -552,6 +569,7 @@ async def create(
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
         stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
+        suffix: str | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         top_p: float | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -596,6 +614,8 @@ async def create(
 
           stream_options: (Optional) The stream options to use.
 
+          suffix: (Optional) The suffix that should be appended to the completion.
+
           temperature: (Optional) The temperature to use.
 
           top_p: (Optional) The top p to use.
@@ -632,6 +652,7 @@ async def create(
         stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
         stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
+        suffix: str | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         top_p: float | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -662,6 +683,7 @@ async def create(
                     "stop": stop,
                     "stream": stream,
                     "stream_options": stream_options,
+                    "suffix": suffix,
                     "temperature": temperature,
                     "top_p": top_p,
                     "user": user,
diff --git a/src/llama_stack_client/resources/vector_stores/__init__.py b/src/llama_stack_client/resources/vector_stores/__init__.py
new file mode 100644
index 00000000..85d202da
--- /dev/null
+++ b/src/llama_stack_client/resources/vector_stores/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .files import (
+    FilesResource,
+    AsyncFilesResource,
+    FilesResourceWithRawResponse,
+    AsyncFilesResourceWithRawResponse,
+    FilesResourceWithStreamingResponse,
+    AsyncFilesResourceWithStreamingResponse,
+)
+from .vector_stores import (
+    VectorStoresResource,
+    AsyncVectorStoresResource,
+    VectorStoresResourceWithRawResponse,
+    AsyncVectorStoresResourceWithRawResponse,
+    VectorStoresResourceWithStreamingResponse,
+    AsyncVectorStoresResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "FilesResource",
+    "AsyncFilesResource",
+    "FilesResourceWithRawResponse",
+    "AsyncFilesResourceWithRawResponse",
+    "FilesResourceWithStreamingResponse",
+    "AsyncFilesResourceWithStreamingResponse",
+    "VectorStoresResource",
+    "AsyncVectorStoresResource",
+    "VectorStoresResourceWithRawResponse",
+    "AsyncVectorStoresResourceWithRawResponse",
+    "VectorStoresResourceWithStreamingResponse",
+    "AsyncVectorStoresResourceWithStreamingResponse",
+]
diff --git a/src/llama_stack_client/resources/vector_stores/files.py b/src/llama_stack_client/resources/vector_stores/files.py
new file mode 100644
index 00000000..2d6af9cf
--- /dev/null
+++ b/src/llama_stack_client/resources/vector_stores/files.py
@@ -0,0 +1,201 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+
+import httpx
+
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..._base_client import make_request_options
+from ...types.vector_stores import file_create_params
+from ...types.vector_stores.vector_store_file import VectorStoreFile
+
+__all__ = ["FilesResource", "AsyncFilesResource"]
+
+
+class FilesResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FilesResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        """
+        return FilesResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FilesResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        """
+        return FilesResourceWithStreamingResponse(self)
+
+    def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_id: str,
+        attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Attach a file to a vector store.
+
+        Args:
+          file_id: The ID of the file to attach to the vector store.
+
+          attributes: The key-value attributes stored with the file, which can be used for filtering.
+
+          chunking_strategy: The chunking strategy to use for the file.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        return self._post(
+            f"/v1/openai/v1/vector_stores/{vector_store_id}/files",
+            body=maybe_transform(
+                {
+                    "file_id": file_id,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_create_params.FileCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+
+class AsyncFilesResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFilesResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFilesResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        """
+        return AsyncFilesResourceWithStreamingResponse(self)
+
+    async def create(
+        self,
+        vector_store_id: str,
+        *,
+        file_id: str,
+        attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Attach a file to a vector store.
+
+        Args:
+          file_id: The ID of the file to attach to the vector store.
+
+          attributes: The key-value attributes stored with the file, which can be used for filtering.
+
+          chunking_strategy: The chunking strategy to use for the file.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        return await self._post(
+            f"/v1/openai/v1/vector_stores/{vector_store_id}/files",
+            body=await async_maybe_transform(
+                {
+                    "file_id": file_id,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_create_params.FileCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
+
+class FilesResourceWithRawResponse:
+    def __init__(self, files: FilesResource) -> None:
+        self._files = files
+
+        self.create = to_raw_response_wrapper(
+            files.create,
+        )
+
+
+class AsyncFilesResourceWithRawResponse:
+    def __init__(self, files: AsyncFilesResource) -> None:
+        self._files = files
+
+        self.create = async_to_raw_response_wrapper(
+            files.create,
+        )
+
+
+class FilesResourceWithStreamingResponse:
+    def __init__(self, files: FilesResource) -> None:
+        self._files = files
+
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+
+
+class AsyncFilesResourceWithStreamingResponse:
+    def __init__(self, files: AsyncFilesResource) -> None:
+        self._files = files
+
+        self.create = async_to_streamed_response_wrapper(
+            files.create,
+        )
diff --git a/src/llama_stack_client/resources/vector_stores.py b/src/llama_stack_client/resources/vector_stores/vector_stores.py
similarity index 94%
rename from src/llama_stack_client/resources/vector_stores.py
rename to src/llama_stack_client/resources/vector_stores/vector_stores.py
index f7df15c9..de0b8205 100644
--- a/src/llama_stack_client/resources/vector_stores.py
+++ b/src/llama_stack_client/resources/vector_stores/vector_stores.py
@@ -6,32 +6,44 @@
 
 import httpx
 
-from ..types import (
+from .files import (
+    FilesResource,
+    AsyncFilesResource,
+    FilesResourceWithRawResponse,
+    AsyncFilesResourceWithRawResponse,
+    FilesResourceWithStreamingResponse,
+    AsyncFilesResourceWithStreamingResponse,
+)
+from ...types import (
     vector_store_list_params,
     vector_store_create_params,
     vector_store_search_params,
     vector_store_update_params,
 )
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
     to_raw_response_wrapper,
     to_streamed_response_wrapper,
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from .._base_client import make_request_options
-from ..types.vector_store import VectorStore
-from ..types.list_vector_stores_response import ListVectorStoresResponse
-from ..types.vector_store_delete_response import VectorStoreDeleteResponse
-from ..types.vector_store_search_response import VectorStoreSearchResponse
+from ..._base_client import make_request_options
+from ...types.vector_store import VectorStore
+from ...types.list_vector_stores_response import ListVectorStoresResponse
+from ...types.vector_store_delete_response import VectorStoreDeleteResponse
+from ...types.vector_store_search_response import VectorStoreSearchResponse
 
 __all__ = ["VectorStoresResource", "AsyncVectorStoresResource"]
 
 
 class VectorStoresResource(SyncAPIResource):
+    @cached_property
+    def files(self) -> FilesResource:
+        return FilesResource(self._client)
+
     @cached_property
     def with_raw_response(self) -> VectorStoresResourceWithRawResponse:
         """
@@ -361,6 +373,10 @@ def search(
 
 
 class AsyncVectorStoresResource(AsyncAPIResource):
+    @cached_property
+    def files(self) -> AsyncFilesResource:
+        return AsyncFilesResource(self._client)
+
     @cached_property
     def with_raw_response(self) -> AsyncVectorStoresResourceWithRawResponse:
         """
@@ -712,6 +728,10 @@ def __init__(self, vector_stores: VectorStoresResource) -> None:
             vector_stores.search,
         )
 
+    @cached_property
+    def files(self) -> FilesResourceWithRawResponse:
+        return FilesResourceWithRawResponse(self._vector_stores.files)
+
 
 class AsyncVectorStoresResourceWithRawResponse:
     def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
@@ -736,6 +756,10 @@ def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
             vector_stores.search,
         )
 
+    @cached_property
+    def files(self) -> AsyncFilesResourceWithRawResponse:
+        return AsyncFilesResourceWithRawResponse(self._vector_stores.files)
+
 
 class VectorStoresResourceWithStreamingResponse:
     def __init__(self, vector_stores: VectorStoresResource) -> None:
@@ -760,6 +784,10 @@ def __init__(self, vector_stores: VectorStoresResource) -> None:
             vector_stores.search,
         )
 
+    @cached_property
+    def files(self) -> FilesResourceWithStreamingResponse:
+        return FilesResourceWithStreamingResponse(self._vector_stores.files)
+
 
 class AsyncVectorStoresResourceWithStreamingResponse:
     def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
@@ -783,3 +811,7 @@ def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
         self.search = async_to_streamed_response_wrapper(
             vector_stores.search,
         )
+
+    @cached_property
+    def files(self) -> AsyncFilesResourceWithStreamingResponse:
+        return AsyncFilesResourceWithStreamingResponse(self._vector_stores.files)
diff --git a/src/llama_stack_client/types/completion_create_params.py b/src/llama_stack_client/types/completion_create_params.py
index e43f05e4..50900e25 100644
--- a/src/llama_stack_client/types/completion_create_params.py
+++ b/src/llama_stack_client/types/completion_create_params.py
@@ -56,6 +56,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
     """(Optional) The stream options to use."""
 
+    suffix: str
+    """(Optional) The suffix that should be appended to the completion."""
+
     temperature: float
     """(Optional) The temperature to use."""
 
diff --git a/src/llama_stack_client/types/response_create_params.py b/src/llama_stack_client/types/response_create_params.py
index 7c0a2539..2b198449 100644
--- a/src/llama_stack_client/types/response_create_params.py
+++ b/src/llama_stack_client/types/response_create_params.py
@@ -9,6 +9,7 @@
     "ResponseCreateParamsBase",
     "InputUnionMember1",
     "InputUnionMember1OpenAIResponseOutputMessageWebSearchToolCall",
+    "InputUnionMember1OpenAIResponseOutputMessageFileSearchToolCall",
     "InputUnionMember1OpenAIResponseOutputMessageFunctionToolCall",
     "InputUnionMember1OpenAIResponseInputFunctionToolCallOutput",
     "InputUnionMember1OpenAIResponseMessage",
@@ -68,6 +69,18 @@ class InputUnionMember1OpenAIResponseOutputMessageWebSearchToolCall(TypedDict, t
     type: Required[Literal["web_search_call"]]
 
 
+class InputUnionMember1OpenAIResponseOutputMessageFileSearchToolCall(TypedDict, total=False):
+    id: Required[str]
+
+    queries: Required[List[str]]
+
+    status: Required[str]
+
+    type: Required[Literal["file_search_call"]]
+
+    results: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+
+
 class InputUnionMember1OpenAIResponseOutputMessageFunctionToolCall(TypedDict, total=False):
     arguments: Required[str]
 
@@ -144,6 +157,7 @@ class InputUnionMember1OpenAIResponseMessage(TypedDict, total=False):
 
 InputUnionMember1: TypeAlias = Union[
     InputUnionMember1OpenAIResponseOutputMessageWebSearchToolCall,
+    InputUnionMember1OpenAIResponseOutputMessageFileSearchToolCall,
     InputUnionMember1OpenAIResponseOutputMessageFunctionToolCall,
     InputUnionMember1OpenAIResponseInputFunctionToolCallOutput,
     InputUnionMember1OpenAIResponseMessage,
@@ -193,7 +207,11 @@ class ToolOpenAIResponseInputToolFileSearchRankingOptions(TypedDict, total=False
 class ToolOpenAIResponseInputToolFileSearch(TypedDict, total=False):
     type: Required[Literal["file_search"]]
 
-    vector_store_id: Required[List[str]]
+    vector_store_ids: Required[List[str]]
+
+    filters: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+
+    max_num_results: int
 
     ranking_options: ToolOpenAIResponseInputToolFileSearchRankingOptions
 
diff --git a/src/llama_stack_client/types/response_list_response.py b/src/llama_stack_client/types/response_list_response.py
index 72c555be..85c640d3 100644
--- a/src/llama_stack_client/types/response_list_response.py
+++ b/src/llama_stack_client/types/response_list_response.py
@@ -13,6 +13,7 @@
     "Data",
     "DataInput",
     "DataInputOpenAIResponseOutputMessageWebSearchToolCall",
+    "DataInputOpenAIResponseOutputMessageFileSearchToolCall",
     "DataInputOpenAIResponseOutputMessageFunctionToolCall",
     "DataInputOpenAIResponseInputFunctionToolCallOutput",
     "DataInputOpenAIResponseMessage",
@@ -27,6 +28,7 @@
     "DataOutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "DataOutputOpenAIResponseMessageContentUnionMember2",
     "DataOutputOpenAIResponseOutputMessageWebSearchToolCall",
+    "DataOutputOpenAIResponseOutputMessageFileSearchToolCall",
     "DataOutputOpenAIResponseOutputMessageFunctionToolCall",
     "DataOutputOpenAIResponseOutputMessageMcpCall",
     "DataOutputOpenAIResponseOutputMessageMcpListTools",
@@ -45,6 +47,18 @@ class DataInputOpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
     type: Literal["web_search_call"]
 
 
+class DataInputOpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
+    id: str
+
+    queries: List[str]
+
+    status: str
+
+    type: Literal["file_search_call"]
+
+    results: Optional[List[Dict[str, Union[bool, float, str, List[object], object, None]]]] = None
+
+
 class DataInputOpenAIResponseOutputMessageFunctionToolCall(BaseModel):
     arguments: str
 
@@ -118,6 +132,7 @@ class DataInputOpenAIResponseMessage(BaseModel):
 
 DataInput: TypeAlias = Union[
     DataInputOpenAIResponseOutputMessageWebSearchToolCall,
+    DataInputOpenAIResponseOutputMessageFileSearchToolCall,
     DataInputOpenAIResponseOutputMessageFunctionToolCall,
     DataInputOpenAIResponseInputFunctionToolCallOutput,
     DataInputOpenAIResponseMessage,
@@ -177,6 +192,18 @@ class DataOutputOpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
     type: Literal["web_search_call"]
 
 
+class DataOutputOpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
+    id: str
+
+    queries: List[str]
+
+    status: str
+
+    type: Literal["file_search_call"]
+
+    results: Optional[List[Dict[str, Union[bool, float, str, List[object], object, None]]]] = None
+
+
 class DataOutputOpenAIResponseOutputMessageFunctionToolCall(BaseModel):
     arguments: str
 
@@ -229,6 +256,7 @@ class DataOutputOpenAIResponseOutputMessageMcpListTools(BaseModel):
     Union[
         DataOutputOpenAIResponseMessage,
         DataOutputOpenAIResponseOutputMessageWebSearchToolCall,
+        DataOutputOpenAIResponseOutputMessageFileSearchToolCall,
         DataOutputOpenAIResponseOutputMessageFunctionToolCall,
         DataOutputOpenAIResponseOutputMessageMcpCall,
         DataOutputOpenAIResponseOutputMessageMcpListTools,
diff --git a/src/llama_stack_client/types/response_object.py b/src/llama_stack_client/types/response_object.py
index 4ce248b1..c09c1a10 100644
--- a/src/llama_stack_client/types/response_object.py
+++ b/src/llama_stack_client/types/response_object.py
@@ -17,6 +17,7 @@
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "OutputOpenAIResponseMessageContentUnionMember2",
     "OutputOpenAIResponseOutputMessageWebSearchToolCall",
+    "OutputOpenAIResponseOutputMessageFileSearchToolCall",
     "OutputOpenAIResponseOutputMessageFunctionToolCall",
     "OutputOpenAIResponseOutputMessageMcpCall",
     "OutputOpenAIResponseOutputMessageMcpListTools",
@@ -78,6 +79,18 @@ class OutputOpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
     type: Literal["web_search_call"]
 
 
+class OutputOpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
+    id: str
+
+    queries: List[str]
+
+    status: str
+
+    type: Literal["file_search_call"]
+
+    results: Optional[List[Dict[str, Union[bool, float, str, List[object], object, None]]]] = None
+
+
 class OutputOpenAIResponseOutputMessageFunctionToolCall(BaseModel):
     arguments: str
 
@@ -130,6 +143,7 @@ class OutputOpenAIResponseOutputMessageMcpListTools(BaseModel):
     Union[
         OutputOpenAIResponseMessage,
         OutputOpenAIResponseOutputMessageWebSearchToolCall,
+        OutputOpenAIResponseOutputMessageFileSearchToolCall,
         OutputOpenAIResponseOutputMessageFunctionToolCall,
         OutputOpenAIResponseOutputMessageMcpCall,
         OutputOpenAIResponseOutputMessageMcpListTools,
diff --git a/src/llama_stack_client/types/response_object_stream.py b/src/llama_stack_client/types/response_object_stream.py
index ef140814..193491c0 100644
--- a/src/llama_stack_client/types/response_object_stream.py
+++ b/src/llama_stack_client/types/response_object_stream.py
@@ -18,6 +18,7 @@
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageWebSearchToolCall",
+    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFileSearchToolCall",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFunctionToolCall",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageMcpCall",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageMcpListTools",
@@ -30,6 +31,7 @@
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageWebSearchToolCall",
+    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFileSearchToolCall",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFunctionToolCall",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageMcpCall",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageMcpListTools",
@@ -116,6 +118,18 @@ class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputM
     type: Literal["web_search_call"]
 
 
+class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
+    id: str
+
+    queries: List[str]
+
+    status: str
+
+    type: Literal["file_search_call"]
+
+    results: Optional[List[Dict[str, Union[bool, float, str, List[object], object, None]]]] = None
+
+
 class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFunctionToolCall(BaseModel):
     arguments: str
 
@@ -168,6 +182,7 @@ class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputM
     Union[
         OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessage,
         OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageWebSearchToolCall,
+        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFileSearchToolCall,
         OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFunctionToolCall,
         OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageMcpCall,
         OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageMcpListTools,
@@ -250,6 +265,18 @@ class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMe
     type: Literal["web_search_call"]
 
 
+class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
+    id: str
+
+    queries: List[str]
+
+    status: str
+
+    type: Literal["file_search_call"]
+
+    results: Optional[List[Dict[str, Union[bool, float, str, List[object], object, None]]]] = None
+
+
 class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFunctionToolCall(BaseModel):
     arguments: str
 
@@ -302,6 +329,7 @@ class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMe
     Union[
         OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessage,
         OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageWebSearchToolCall,
+        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFileSearchToolCall,
         OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFunctionToolCall,
         OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageMcpCall,
         OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageMcpListTools,
diff --git a/src/llama_stack_client/types/responses/input_item_list_response.py b/src/llama_stack_client/types/responses/input_item_list_response.py
index 1d8f5a76..5b63b51a 100644
--- a/src/llama_stack_client/types/responses/input_item_list_response.py
+++ b/src/llama_stack_client/types/responses/input_item_list_response.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List, Union, Optional
+from typing import Dict, List, Union, Optional
 from typing_extensions import Literal, Annotated, TypeAlias
 
 from ..._utils import PropertyInfo
@@ -10,6 +10,7 @@
     "InputItemListResponse",
     "Data",
     "DataOpenAIResponseOutputMessageWebSearchToolCall",
+    "DataOpenAIResponseOutputMessageFileSearchToolCall",
     "DataOpenAIResponseOutputMessageFunctionToolCall",
     "DataOpenAIResponseInputFunctionToolCallOutput",
     "DataOpenAIResponseMessage",
@@ -28,6 +29,18 @@ class DataOpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
     type: Literal["web_search_call"]
 
 
+class DataOpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
+    id: str
+
+    queries: List[str]
+
+    status: str
+
+    type: Literal["file_search_call"]
+
+    results: Optional[List[Dict[str, Union[bool, float, str, List[object], object, None]]]] = None
+
+
 class DataOpenAIResponseOutputMessageFunctionToolCall(BaseModel):
     arguments: str
 
@@ -99,6 +112,7 @@ class DataOpenAIResponseMessage(BaseModel):
 
 Data: TypeAlias = Union[
     DataOpenAIResponseOutputMessageWebSearchToolCall,
+    DataOpenAIResponseOutputMessageFileSearchToolCall,
     DataOpenAIResponseOutputMessageFunctionToolCall,
     DataOpenAIResponseInputFunctionToolCallOutput,
     DataOpenAIResponseMessage,
diff --git a/src/llama_stack_client/types/shared/query_config.py b/src/llama_stack_client/types/shared/query_config.py
index 5695bc20..3628efbf 100644
--- a/src/llama_stack_client/types/shared/query_config.py
+++ b/src/llama_stack_client/types/shared/query_config.py
@@ -1,11 +1,40 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Optional
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
 
+from ..._utils import PropertyInfo
 from ..._models import BaseModel
 from .query_generator_config import QueryGeneratorConfig
 
-__all__ = ["QueryConfig"]
+__all__ = ["QueryConfig", "Ranker", "RankerRrfRanker", "RankerWeightedRanker"]
+
+
+class RankerRrfRanker(BaseModel):
+    impact_factor: float
+    """The impact factor for RRF scoring.
+
+    Higher values give more weight to higher-ranked results. Must be greater than 0.
+    Default of 60 is from the original RRF paper (Cormack et al., 2009).
+    """
+
+    type: Literal["rrf"]
+    """The type of ranker, always "rrf" """
+
+
+class RankerWeightedRanker(BaseModel):
+    alpha: float
+    """Weight factor between 0 and 1.
+
+    0 means only use keyword scores, 1 means only use vector scores, values in
+    between blend both scores.
+    """
+
+    type: Literal["weighted"]
+    """The type of ranker, always "weighted" """
+
+
+Ranker: TypeAlias = Annotated[Union[RankerRrfRanker, RankerWeightedRanker], PropertyInfo(discriminator="type")]
 
 
 class QueryConfig(BaseModel):
@@ -27,4 +56,10 @@ class QueryConfig(BaseModel):
     """Configuration for the query generator."""
 
     mode: Optional[str] = None
-    """Search mode for retrieval—either "vector" or "keyword". Default "vector"."""
+    """Search mode for retrieval—either "vector", "keyword", or "hybrid".
+
+    Default "vector".
+    """
+
+    ranker: Optional[Ranker] = None
+    """Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."""
diff --git a/src/llama_stack_client/types/shared_params/query_config.py b/src/llama_stack_client/types/shared_params/query_config.py
index ded8ff9e..dd482ecf 100644
--- a/src/llama_stack_client/types/shared_params/query_config.py
+++ b/src/llama_stack_client/types/shared_params/query_config.py
@@ -2,11 +2,39 @@
 
 from __future__ import annotations
 
-from typing_extensions import Required, TypedDict
+from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from .query_generator_config import QueryGeneratorConfig
 
-__all__ = ["QueryConfig"]
+__all__ = ["QueryConfig", "Ranker", "RankerRrfRanker", "RankerWeightedRanker"]
+
+
+class RankerRrfRanker(TypedDict, total=False):
+    impact_factor: Required[float]
+    """The impact factor for RRF scoring.
+
+    Higher values give more weight to higher-ranked results. Must be greater than 0.
+    Default of 60 is from the original RRF paper (Cormack et al., 2009).
+    """
+
+    type: Required[Literal["rrf"]]
+    """The type of ranker, always "rrf" """
+
+
+class RankerWeightedRanker(TypedDict, total=False):
+    alpha: Required[float]
+    """Weight factor between 0 and 1.
+
+    0 means only use keyword scores, 1 means only use vector scores, values in
+    between blend both scores.
+    """
+
+    type: Required[Literal["weighted"]]
+    """The type of ranker, always "weighted" """
+
+
+Ranker: TypeAlias = Union[RankerRrfRanker, RankerWeightedRanker]
 
 
 class QueryConfig(TypedDict, total=False):
@@ -28,4 +56,10 @@ class QueryConfig(TypedDict, total=False):
     """Configuration for the query generator."""
 
     mode: str
-    """Search mode for retrieval—either "vector" or "keyword". Default "vector"."""
+    """Search mode for retrieval—either "vector", "keyword", or "hybrid".
+
+    Default "vector".
+    """
+
+    ranker: Ranker
+    """Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."""
diff --git a/src/llama_stack_client/types/vector_stores/__init__.py b/src/llama_stack_client/types/vector_stores/__init__.py
new file mode 100644
index 00000000..550270e2
--- /dev/null
+++ b/src/llama_stack_client/types/vector_stores/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .vector_store_file import VectorStoreFile as VectorStoreFile
+from .file_create_params import FileCreateParams as FileCreateParams
diff --git a/src/llama_stack_client/types/vector_stores/file_create_params.py b/src/llama_stack_client/types/vector_stores/file_create_params.py
new file mode 100644
index 00000000..66fbf624
--- /dev/null
+++ b/src/llama_stack_client/types/vector_stores/file_create_params.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "FileCreateParams",
+    "ChunkingStrategy",
+    "ChunkingStrategyVectorStoreChunkingStrategyAuto",
+    "ChunkingStrategyVectorStoreChunkingStrategyStatic",
+    "ChunkingStrategyVectorStoreChunkingStrategyStaticStatic",
+]
+
+
+class FileCreateParams(TypedDict, total=False):
+    file_id: Required[str]
+    """The ID of the file to attach to the vector store."""
+
+    attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
+    """The key-value attributes stored with the file, which can be used for filtering."""
+
+    chunking_strategy: ChunkingStrategy
+    """The chunking strategy to use for the file."""
+
+
+class ChunkingStrategyVectorStoreChunkingStrategyAuto(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+
+
+class ChunkingStrategyVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+
+    max_chunk_size_tokens: Required[int]
+
+
+class ChunkingStrategyVectorStoreChunkingStrategyStatic(TypedDict, total=False):
+    static: Required[ChunkingStrategyVectorStoreChunkingStrategyStaticStatic]
+
+    type: Required[Literal["static"]]
+
+
+ChunkingStrategy: TypeAlias = Union[
+    ChunkingStrategyVectorStoreChunkingStrategyAuto, ChunkingStrategyVectorStoreChunkingStrategyStatic
+]
diff --git a/src/llama_stack_client/types/vector_stores/vector_store_file.py b/src/llama_stack_client/types/vector_stores/vector_store_file.py
new file mode 100644
index 00000000..45ce03f8
--- /dev/null
+++ b/src/llama_stack_client/types/vector_stores/vector_store_file.py
@@ -0,0 +1,64 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+    "VectorStoreFile",
+    "ChunkingStrategy",
+    "ChunkingStrategyVectorStoreChunkingStrategyAuto",
+    "ChunkingStrategyVectorStoreChunkingStrategyStatic",
+    "ChunkingStrategyVectorStoreChunkingStrategyStaticStatic",
+    "LastError",
+]
+
+
+class ChunkingStrategyVectorStoreChunkingStrategyAuto(BaseModel):
+    type: Literal["auto"]
+
+
+class ChunkingStrategyVectorStoreChunkingStrategyStaticStatic(BaseModel):
+    chunk_overlap_tokens: int
+
+    max_chunk_size_tokens: int
+
+
+class ChunkingStrategyVectorStoreChunkingStrategyStatic(BaseModel):
+    static: ChunkingStrategyVectorStoreChunkingStrategyStaticStatic
+
+    type: Literal["static"]
+
+
+ChunkingStrategy: TypeAlias = Annotated[
+    Union[ChunkingStrategyVectorStoreChunkingStrategyAuto, ChunkingStrategyVectorStoreChunkingStrategyStatic],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class LastError(BaseModel):
+    code: Literal["server_error", "rate_limit_exceeded"]
+
+    message: str
+
+
+class VectorStoreFile(BaseModel):
+    id: str
+
+    attributes: Dict[str, Union[bool, float, str, List[object], object, None]]
+
+    chunking_strategy: ChunkingStrategy
+
+    created_at: int
+
+    object: str
+
+    status: Literal["completed", "in_progress", "cancelled", "failed"]
+
+    usage_bytes: int
+
+    vector_store_id: str
+
+    last_error: Optional[LastError] = None
diff --git a/tests/api_resources/test_completions.py b/tests/api_resources/test_completions.py
index 30e15b7b..42dd8a95 100644
--- a/tests/api_resources/test_completions.py
+++ b/tests/api_resources/test_completions.py
@@ -44,6 +44,7 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
             stop="string",
             stream=False,
             stream_options={"foo": True},
+            suffix="suffix",
             temperature=0,
             top_p=0,
             user="user",
@@ -104,6 +105,7 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
             seed=0,
             stop="string",
             stream_options={"foo": True},
+            suffix="suffix",
             temperature=0,
             top_p=0,
             user="user",
@@ -168,6 +170,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             stop="string",
             stream=False,
             stream_options={"foo": True},
+            suffix="suffix",
             temperature=0,
             top_p=0,
             user="user",
@@ -228,6 +231,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             seed=0,
             stop="string",
             stream_options={"foo": True},
+            suffix="suffix",
             temperature=0,
             top_p=0,
             user="user",
diff --git a/tests/api_resources/tool_runtime/test_rag_tool.py b/tests/api_resources/tool_runtime/test_rag_tool.py
index 4169e085..16ea0bb5 100644
--- a/tests/api_resources/tool_runtime/test_rag_tool.py
+++ b/tests/api_resources/tool_runtime/test_rag_tool.py
@@ -94,6 +94,10 @@ def test_method_query_with_all_params(self, client: LlamaStackClient) -> None:
                     "type": "default",
                 },
                 "mode": "mode",
+                "ranker": {
+                    "impact_factor": 0,
+                    "type": "rrf",
+                },
             },
         )
         assert_matches_type(QueryResult, rag_tool, path=["response"])
@@ -205,6 +209,10 @@ async def test_method_query_with_all_params(self, async_client: AsyncLlamaStackC
                     "type": "default",
                 },
                 "mode": "mode",
+                "ranker": {
+                    "impact_factor": 0,
+                    "type": "rrf",
+                },
             },
         )
         assert_matches_type(QueryResult, rag_tool, path=["response"])
diff --git a/tests/api_resources/vector_stores/__init__.py b/tests/api_resources/vector_stores/__init__.py
new file mode 100644
index 00000000..fd8019a9
--- /dev/null
+++ b/tests/api_resources/vector_stores/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/vector_stores/test_files.py b/tests/api_resources/vector_stores/test_files.py
new file mode 100644
index 00000000..c88e5c95
--- /dev/null
+++ b/tests/api_resources/vector_stores/test_files.py
@@ -0,0 +1,126 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client.types.vector_stores import VectorStoreFile
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestFiles:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: LlamaStackClient) -> None:
+        file = client.vector_stores.files.create(
+            vector_store_id="vector_store_id",
+            file_id="file_id",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
+        file = client.vector_stores.files.create(
+            vector_store_id="vector_store_id",
+            file_id="file_id",
+            attributes={"foo": True},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: LlamaStackClient) -> None:
+        response = client.vector_stores.files.with_raw_response.create(
+            vector_store_id="vector_store_id",
+            file_id="file_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
+        with client.vector_stores.files.with_streaming_response.create(
+            vector_store_id="vector_store_id",
+            file_id="file_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: LlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.create(
+                vector_store_id="",
+                file_id="file_id",
+            )
+
+
+class TestAsyncFiles:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
+        file = await async_client.vector_stores.files.create(
+            vector_store_id="vector_store_id",
+            file_id="file_id",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
+        file = await async_client.vector_stores.files.create(
+            vector_store_id="vector_store_id",
+            file_id="file_id",
+            attributes={"foo": True},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.create(
+            vector_store_id="vector_store_id",
+            file_id="file_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = await response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.create(
+            vector_store_id="vector_store_id",
+            file_id="file_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncLlamaStackClient) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.create(
+                vector_store_id="",
+                file_id="file_id",
+            )