diff --git a/src/llama_stack_client/__init__.py b/src/llama_stack_client/__init__.py
index 7dc65e78..4fdd36f2 100644
--- a/src/llama_stack_client/__init__.py
+++ b/src/llama_stack_client/__init__.py
@@ -36,7 +36,7 @@
     UnprocessableEntityError,
     APIResponseValidationError,
 )
-from ._base_client import DefaultHttpxClient, DefaultAsyncHttpxClient
+from ._base_client import DefaultHttpxClient, DefaultAioHttpClient, DefaultAsyncHttpxClient
 from ._utils._logs import setup_logging as _setup_logging
 
 from .lib.agents.agent import Agent
@@ -84,6 +84,7 @@
     "DEFAULT_CONNECTION_LIMITS",
     "DefaultHttpxClient",
     "DefaultAsyncHttpxClient",
+    "DefaultAioHttpClient",
 ]
 
 if not _t.TYPE_CHECKING:
diff --git a/src/llama_stack_client/_base_client.py b/src/llama_stack_client/_base_client.py
index 4475c723..a0bbc468 100644
--- a/src/llama_stack_client/_base_client.py
+++ b/src/llama_stack_client/_base_client.py
@@ -1289,6 +1289,24 @@ def __init__(self, **kwargs: Any) -> None:
         super().__init__(**kwargs)
 
 
+try:
+    import httpx_aiohttp
+except ImportError:
+
+    class _DefaultAioHttpClient(httpx.AsyncClient):
+        def __init__(self, **_kwargs: Any) -> None:
+            raise RuntimeError("To use the aiohttp client you must have installed the package with the `aiohttp` extra")
+else:
+
+    class _DefaultAioHttpClient(httpx_aiohttp.HttpxAiohttpClient):  # type: ignore
+        def __init__(self, **kwargs: Any) -> None:
+            kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+            kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+            kwargs.setdefault("follow_redirects", True)
+
+            super().__init__(**kwargs)
+
+
 if TYPE_CHECKING:
     DefaultAsyncHttpxClient = httpx.AsyncClient
     """An alias to `httpx.AsyncClient` that provides the same defaults that this SDK
@@ -1297,8 +1315,12 @@ def __init__(self, **kwargs: Any) -> None:
     This is useful because overriding the `http_client` with your own instance of
     `httpx.AsyncClient` will result in httpx's defaults being used, not ours.
     """
+
+    DefaultAioHttpClient = httpx.AsyncClient
+    """An alias to `httpx.AsyncClient` that changes the default HTTP transport to `aiohttp`."""
 else:
     DefaultAsyncHttpxClient = _DefaultAsyncHttpxClient
+    DefaultAioHttpClient = _DefaultAioHttpClient
 
 
 class AsyncHttpxClientWrapper(DefaultAsyncHttpxClient):
diff --git a/src/llama_stack_client/resources/vector_stores/vector_stores.py b/src/llama_stack_client/resources/vector_stores/vector_stores.py
index 79ab9db3..573b2f6d 100644
--- a/src/llama_stack_client/resources/vector_stores/vector_stores.py
+++ b/src/llama_stack_client/resources/vector_stores/vector_stores.py
@@ -320,6 +320,7 @@ def search(
         max_num_results: int | NotGiven = NOT_GIVEN,
         ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
         rewrite_query: bool | NotGiven = NOT_GIVEN,
+        search_mode: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -343,6 +344,8 @@ def search(
 
           rewrite_query: Whether to rewrite the natural language query for vector search (default false)
 
+          search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -362,6 +365,7 @@ def search(
                     "max_num_results": max_num_results,
                     "ranking_options": ranking_options,
                     "rewrite_query": rewrite_query,
+                    "search_mode": search_mode,
                 },
                 vector_store_search_params.VectorStoreSearchParams,
             ),
@@ -653,6 +657,7 @@ async def search(
         max_num_results: int | NotGiven = NOT_GIVEN,
         ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
         rewrite_query: bool | NotGiven = NOT_GIVEN,
+        search_mode: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -676,6 +681,8 @@ async def search(
 
           rewrite_query: Whether to rewrite the natural language query for vector search (default false)
 
+          search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -695,6 +702,7 @@ async def search(
                     "max_num_results": max_num_results,
                     "ranking_options": ranking_options,
                     "rewrite_query": rewrite_query,
+                    "search_mode": search_mode,
                 },
                 vector_store_search_params.VectorStoreSearchParams,
             ),
diff --git a/src/llama_stack_client/types/query_chunks_response.py b/src/llama_stack_client/types/query_chunks_response.py
index f668ecac..97c1927c 100644
--- a/src/llama_stack_client/types/query_chunks_response.py
+++ b/src/llama_stack_client/types/query_chunks_response.py
@@ -5,7 +5,45 @@
 from .._models import BaseModel
 from .shared.interleaved_content import InterleavedContent
 
-__all__ = ["QueryChunksResponse", "Chunk"]
+__all__ = ["QueryChunksResponse", "Chunk", "ChunkChunkMetadata"]
+
+
+class ChunkChunkMetadata(BaseModel):
+    chunk_embedding_dimension: Optional[int] = None
+    """The dimension of the embedding vector for the chunk."""
+
+    chunk_embedding_model: Optional[str] = None
+    """The embedding model used to create the chunk's embedding."""
+
+    chunk_id: Optional[str] = None
+    """The ID of the chunk.
+
+    If not set, it will be generated based on the document ID and content.
+    """
+
+    chunk_tokenizer: Optional[str] = None
+    """The tokenizer used to create the chunk. Default is Tiktoken."""
+
+    chunk_window: Optional[str] = None
+    """The window of the chunk, which can be used to group related chunks together."""
+
+    content_token_count: Optional[int] = None
+    """The number of tokens in the content of the chunk."""
+
+    created_timestamp: Optional[int] = None
+    """An optional timestamp indicating when the chunk was created."""
+
+    document_id: Optional[str] = None
+    """The ID of the document this chunk belongs to."""
+
+    metadata_token_count: Optional[int] = None
+    """The number of tokens in the metadata of the chunk."""
+
+    source: Optional[str] = None
+    """The source of the content, such as a URL, file path, or other identifier."""
+
+    updated_timestamp: Optional[int] = None
+    """An optional timestamp indicating when the chunk was last updated."""
 
 
 class Chunk(BaseModel):
@@ -16,13 +54,25 @@ class Chunk(BaseModel):
 
     metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
     """
-    Metadata associated with the chunk, such as document ID, source, or other
-    relevant information.
+    Metadata associated with the chunk that will be used in the model context during
+    inference.
+    """
+
+    chunk_metadata: Optional[ChunkChunkMetadata] = None
+    """Metadata for the chunk that will NOT be used in the context during inference.
+
+    The `chunk_metadata` is required backend functionality.
     """
 
     embedding: Optional[List[float]] = None
     """Optional embedding for the chunk. If not provided, it will be computed later."""
 
+    stored_chunk_id: Optional[str] = None
+    """The chunk ID that is stored in the vector database.
+
+    Used for backend functionality.
+    """
+
 
 class QueryChunksResponse(BaseModel):
     chunks: List[Chunk]
diff --git a/src/llama_stack_client/types/response_create_params.py b/src/llama_stack_client/types/response_create_params.py
index 2b198449..471d8b21 100644
--- a/src/llama_stack_client/types/response_create_params.py
+++ b/src/llama_stack_client/types/response_create_params.py
@@ -17,6 +17,11 @@
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember2",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2Annotation",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
     "Text",
     "TextFormat",
     "Tool",
@@ -131,7 +136,69 @@ class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInp
 ]
 
 
+class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(
+    TypedDict, total=False
+):
+    file_id: Required[str]
+
+    filename: Required[str]
+
+    index: Required[int]
+
+    type: Required[Literal["file_citation"]]
+
+
+class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(
+    TypedDict, total=False
+):
+    end_index: Required[int]
+
+    start_index: Required[int]
+
+    title: Required[str]
+
+    type: Required[Literal["url_citation"]]
+
+    url: Required[str]
+
+
+class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
+    TypedDict, total=False
+):
+    container_id: Required[str]
+
+    end_index: Required[int]
+
+    file_id: Required[str]
+
+    filename: Required[str]
+
+    start_index: Required[int]
+
+    type: Required[Literal["container_file_citation"]]
+
+
+class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(
+    TypedDict, total=False
+):
+    file_id: Required[str]
+
+    index: Required[int]
+
+    type: Required[Literal["file_path"]]
+
+
+InputUnionMember1OpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Union[
+    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+]
+
+
 class InputUnionMember1OpenAIResponseMessageContentUnionMember2(TypedDict, total=False):
+    annotations: Required[Iterable[InputUnionMember1OpenAIResponseMessageContentUnionMember2Annotation]]
+
     text: Required[str]
 
     type: Required[Literal["output_text"]]
@@ -193,7 +260,7 @@ class Text(TypedDict, total=False):
 
 
 class ToolOpenAIResponseInputToolWebSearch(TypedDict, total=False):
-    type: Required[Literal["web_search", "web_search_preview_2025_03_11"]]
+    type: Required[Literal["web_search", "web_search_preview", "web_search_preview_2025_03_11"]]
 
     search_context_size: str
 
diff --git a/src/llama_stack_client/types/response_list_response.py b/src/llama_stack_client/types/response_list_response.py
index 85c640d3..d46213ef 100644
--- a/src/llama_stack_client/types/response_list_response.py
+++ b/src/llama_stack_client/types/response_list_response.py
@@ -21,12 +21,22 @@
     "DataInputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "DataInputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "DataInputOpenAIResponseMessageContentUnionMember2",
+    "DataInputOpenAIResponseMessageContentUnionMember2Annotation",
+    "DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
     "DataOutput",
     "DataOutputOpenAIResponseMessage",
     "DataOutputOpenAIResponseMessageContentUnionMember1",
     "DataOutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "DataOutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "DataOutputOpenAIResponseMessageContentUnionMember2",
+    "DataOutputOpenAIResponseMessageContentUnionMember2Annotation",
+    "DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
     "DataOutputOpenAIResponseOutputMessageWebSearchToolCall",
     "DataOutputOpenAIResponseOutputMessageFileSearchToolCall",
     "DataOutputOpenAIResponseOutputMessageFunctionToolCall",
@@ -108,7 +118,66 @@ class DataInputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessag
 ]
 
 
+class DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(BaseModel):
+    file_id: str
+
+    filename: str
+
+    index: int
+
+    type: Literal["file_citation"]
+
+
+class DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(BaseModel):
+    end_index: int
+
+    start_index: int
+
+    title: str
+
+    type: Literal["url_citation"]
+
+    url: str
+
+
+class DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
+    BaseModel
+):
+    container_id: str
+
+    end_index: int
+
+    file_id: str
+
+    filename: str
+
+    start_index: int
+
+    type: Literal["container_file_citation"]
+
+
+class DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(BaseModel):
+    file_id: str
+
+    index: int
+
+    type: Literal["file_path"]
+
+
+DataInputOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
+    Union[
+        DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+        DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+        DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+        DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
 class DataInputOpenAIResponseMessageContentUnionMember2(BaseModel):
+    annotations: List[DataInputOpenAIResponseMessageContentUnionMember2Annotation]
+
     text: str
 
     type: Literal["output_text"]
@@ -162,7 +231,66 @@ class DataOutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessa
 ]
 
 
+class DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(BaseModel):
+    file_id: str
+
+    filename: str
+
+    index: int
+
+    type: Literal["file_citation"]
+
+
+class DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(BaseModel):
+    end_index: int
+
+    start_index: int
+
+    title: str
+
+    type: Literal["url_citation"]
+
+    url: str
+
+
+class DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
+    BaseModel
+):
+    container_id: str
+
+    end_index: int
+
+    file_id: str
+
+    filename: str
+
+    start_index: int
+
+    type: Literal["container_file_citation"]
+
+
+class DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(BaseModel):
+    file_id: str
+
+    index: int
+
+    type: Literal["file_path"]
+
+
+DataOutputOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
+    Union[
+        DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+        DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+        DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+        DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
 class DataOutputOpenAIResponseMessageContentUnionMember2(BaseModel):
+    annotations: List[DataOutputOpenAIResponseMessageContentUnionMember2Annotation]
+
     text: str
 
     type: Literal["output_text"]
diff --git a/src/llama_stack_client/types/response_object.py b/src/llama_stack_client/types/response_object.py
index a00115bb..e4b313d3 100644
--- a/src/llama_stack_client/types/response_object.py
+++ b/src/llama_stack_client/types/response_object.py
@@ -16,6 +16,11 @@
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "OutputOpenAIResponseMessageContentUnionMember2",
+    "OutputOpenAIResponseMessageContentUnionMember2Annotation",
+    "OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
     "OutputOpenAIResponseOutputMessageWebSearchToolCall",
     "OutputOpenAIResponseOutputMessageFileSearchToolCall",
     "OutputOpenAIResponseOutputMessageFunctionToolCall",
@@ -51,7 +56,64 @@ class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCo
 ]
 
 
+class OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(BaseModel):
+    file_id: str
+
+    filename: str
+
+    index: int
+
+    type: Literal["file_citation"]
+
+
+class OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(BaseModel):
+    end_index: int
+
+    start_index: int
+
+    title: str
+
+    type: Literal["url_citation"]
+
+    url: str
+
+
+class OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(BaseModel):
+    container_id: str
+
+    end_index: int
+
+    file_id: str
+
+    filename: str
+
+    start_index: int
+
+    type: Literal["container_file_citation"]
+
+
+class OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(BaseModel):
+    file_id: str
+
+    index: int
+
+    type: Literal["file_path"]
+
+
+OutputOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
+    Union[
+        OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+        OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+        OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+        OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
 class OutputOpenAIResponseMessageContentUnionMember2(BaseModel):
+    annotations: List[OutputOpenAIResponseMessageContentUnionMember2Annotation]
+
     text: str
 
     type: Literal["output_text"]
@@ -189,6 +251,16 @@ class Error(BaseModel):
 
 
 class ResponseObject(BaseModel):
+    @property
+    def output_text(self) -> str:
+        texts: List[str] = []
+        for output in self.output:
+            if output.type == "message":
+                for content in output.content:
+                    if content.type == "output_text":
+                        texts.append(content.text)
+        return "".join(texts)
+
     id: str
 
     created_at: int
diff --git a/src/llama_stack_client/types/response_object_stream.py b/src/llama_stack_client/types/response_object_stream.py
index 193491c0..311ad6bd 100644
--- a/src/llama_stack_client/types/response_object_stream.py
+++ b/src/llama_stack_client/types/response_object_stream.py
@@ -17,6 +17,11 @@
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2",
+    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2Annotation",
+    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageWebSearchToolCall",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFileSearchToolCall",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFunctionToolCall",
@@ -30,6 +35,11 @@
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2",
+    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2Annotation",
+    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageWebSearchToolCall",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFileSearchToolCall",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFunctionToolCall",
@@ -88,7 +98,74 @@ class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessage
 ]
 
 
+class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(
+    BaseModel
+):
+    file_id: str
+
+    filename: str
+
+    index: int
+
+    type: Literal["file_citation"]
+
+
+class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(
+    BaseModel
+):
+    end_index: int
+
+    start_index: int
+
+    title: str
+
+    type: Literal["url_citation"]
+
+    url: str
+
+
+class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
+    BaseModel
+):
+    container_id: str
+
+    end_index: int
+
+    file_id: str
+
+    filename: str
+
+    start_index: int
+
+    type: Literal["container_file_citation"]
+
+
+class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(
+    BaseModel
+):
+    file_id: str
+
+    index: int
+
+    type: Literal["file_path"]
+
+
+OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
+    Union[
+        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
 class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2(BaseModel):
+    annotations: List[
+        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2Annotation
+    ]
+
     text: str
 
     type: Literal["output_text"]
@@ -235,7 +312,74 @@ class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageC
 ]
 
 
+class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(
+    BaseModel
+):
+    file_id: str
+
+    filename: str
+
+    index: int
+
+    type: Literal["file_citation"]
+
+
+class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(
+    BaseModel
+):
+    end_index: int
+
+    start_index: int
+
+    title: str
+
+    type: Literal["url_citation"]
+
+    url: str
+
+
+class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
+    BaseModel
+):
+    container_id: str
+
+    end_index: int
+
+    file_id: str
+
+    filename: str
+
+    start_index: int
+
+    type: Literal["container_file_citation"]
+
+
+class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(
+    BaseModel
+):
+    file_id: str
+
+    index: int
+
+    type: Literal["file_path"]
+
+
+OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
+    Union[
+        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
 class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2(BaseModel):
+    annotations: List[
+        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2Annotation
+    ]
+
     text: str
 
     type: Literal["output_text"]
diff --git a/src/llama_stack_client/types/responses/input_item_list_response.py b/src/llama_stack_client/types/responses/input_item_list_response.py
index 5b63b51a..aadcd9f2 100644
--- a/src/llama_stack_client/types/responses/input_item_list_response.py
+++ b/src/llama_stack_client/types/responses/input_item_list_response.py
@@ -18,6 +18,11 @@
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "DataOpenAIResponseMessageContentUnionMember2",
+    "DataOpenAIResponseMessageContentUnionMember2Annotation",
+    "DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
 ]
 
 
@@ -90,7 +95,64 @@ class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
 ]
 
 
+class DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(BaseModel):
+    file_id: str
+
+    filename: str
+
+    index: int
+
+    type: Literal["file_citation"]
+
+
+class DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(BaseModel):
+    end_index: int
+
+    start_index: int
+
+    title: str
+
+    type: Literal["url_citation"]
+
+    url: str
+
+
+class DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(BaseModel):
+    container_id: str
+
+    end_index: int
+
+    file_id: str
+
+    filename: str
+
+    start_index: int
+
+    type: Literal["container_file_citation"]
+
+
+class DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(BaseModel):
+    file_id: str
+
+    index: int
+
+    type: Literal["file_path"]
+
+
+DataOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
+    Union[
+        DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+        DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+        DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+        DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
 class DataOpenAIResponseMessageContentUnionMember2(BaseModel):
+    annotations: List[DataOpenAIResponseMessageContentUnionMember2Annotation]
+
     text: str
 
     type: Literal["output_text"]
diff --git a/src/llama_stack_client/types/vector_io_insert_params.py b/src/llama_stack_client/types/vector_io_insert_params.py
index 2dded485..5b6580fe 100644
--- a/src/llama_stack_client/types/vector_io_insert_params.py
+++ b/src/llama_stack_client/types/vector_io_insert_params.py
@@ -7,7 +7,7 @@
 
 from .shared_params.interleaved_content import InterleavedContent
 
-__all__ = ["VectorIoInsertParams", "Chunk"]
+__all__ = ["VectorIoInsertParams", "Chunk", "ChunkChunkMetadata"]
 
 
 class VectorIoInsertParams(TypedDict, total=False):
@@ -28,6 +28,44 @@ class VectorIoInsertParams(TypedDict, total=False):
     """The time to live of the chunks."""
 
 
+class ChunkChunkMetadata(TypedDict, total=False):
+    chunk_embedding_dimension: int
+    """The dimension of the embedding vector for the chunk."""
+
+    chunk_embedding_model: str
+    """The embedding model used to create the chunk's embedding."""
+
+    chunk_id: str
+    """The ID of the chunk.
+
+    If not set, it will be generated based on the document ID and content.
+    """
+
+    chunk_tokenizer: str
+    """The tokenizer used to create the chunk. Default is Tiktoken."""
+
+    chunk_window: str
+    """The window of the chunk, which can be used to group related chunks together."""
+
+    content_token_count: int
+    """The number of tokens in the content of the chunk."""
+
+    created_timestamp: int
+    """An optional timestamp indicating when the chunk was created."""
+
+    document_id: str
+    """The ID of the document this chunk belongs to."""
+
+    metadata_token_count: int
+    """The number of tokens in the metadata of the chunk."""
+
+    source: str
+    """The source of the content, such as a URL, file path, or other identifier."""
+
+    updated_timestamp: int
+    """An optional timestamp indicating when the chunk was last updated."""
+
+
 class Chunk(TypedDict, total=False):
     content: Required[InterleavedContent]
     """
@@ -36,9 +74,21 @@ class Chunk(TypedDict, total=False):
 
     metadata: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
     """
-    Metadata associated with the chunk, such as document ID, source, or other
-    relevant information.
+    Metadata associated with the chunk that will be used in the model context during
+    inference.
+    """
+
+    chunk_metadata: ChunkChunkMetadata
+    """Metadata for the chunk that will NOT be used in the context during inference.
+
+    The `chunk_metadata` is required backend functionality.
     """
 
     embedding: Iterable[float]
     """Optional embedding for the chunk. If not provided, it will be computed later."""
+
+    stored_chunk_id: str
+    """The chunk ID that is stored in the vector database.
+
+    Used for backend functionality.
+    """
diff --git a/src/llama_stack_client/types/vector_store_search_params.py b/src/llama_stack_client/types/vector_store_search_params.py
index c7e86cd0..fdb02ff7 100644
--- a/src/llama_stack_client/types/vector_store_search_params.py
+++ b/src/llama_stack_client/types/vector_store_search_params.py
@@ -24,6 +24,9 @@ class VectorStoreSearchParams(TypedDict, total=False):
     rewrite_query: bool
     """Whether to rewrite the natural language query for vector search (default false)"""
 
+    search_mode: str
+    """The search mode to use - "keyword", "vector", or "hybrid" (default "vector")"""
+
 
 class RankingOptions(TypedDict, total=False):
     ranker: str
diff --git a/tests/api_resources/agents/test_session.py b/tests/api_resources/agents/test_session.py
index 43b36a40..2c80df58 100644
--- a/tests/api_resources/agents/test_session.py
+++ b/tests/api_resources/agents/test_session.py
@@ -169,7 +169,9 @@ def test_path_params_delete(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncSession:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/agents/test_steps.py b/tests/api_resources/agents/test_steps.py
index 2cee6f12..5555a9a4 100644
--- a/tests/api_resources/agents/test_steps.py
+++ b/tests/api_resources/agents/test_steps.py
@@ -93,7 +93,9 @@ def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncSteps:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/agents/test_turn.py b/tests/api_resources/agents/test_turn.py
index 3eacbba0..31eb53f9 100644
--- a/tests/api_resources/agents/test_turn.py
+++ b/tests/api_resources/agents/test_turn.py
@@ -522,7 +522,9 @@ def test_path_params_resume_overload_2(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncTurn:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index ff450202..496ea061 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -266,7 +266,9 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncCompletions:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/eval/test_jobs.py b/tests/api_resources/eval/test_jobs.py
index 5f289c74..17b02896 100644
--- a/tests/api_resources/eval/test_jobs.py
+++ b/tests/api_resources/eval/test_jobs.py
@@ -163,7 +163,9 @@ def test_path_params_status(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncJobs:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/post_training/test_job.py b/tests/api_resources/post_training/test_job.py
index 6fca52db..158eafbc 100644
--- a/tests/api_resources/post_training/test_job.py
+++ b/tests/api_resources/post_training/test_job.py
@@ -141,7 +141,9 @@ def test_streaming_response_status(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncJob:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/responses/test_input_items.py b/tests/api_resources/responses/test_input_items.py
index c1b25556..a0160f72 100644
--- a/tests/api_resources/responses/test_input_items.py
+++ b/tests/api_resources/responses/test_input_items.py
@@ -69,7 +69,9 @@ def test_path_params_list(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncInputItems:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/test_agents.py
index 1c0478a6..c4aa5349 100644
--- a/tests/api_resources/test_agents.py
+++ b/tests/api_resources/test_agents.py
@@ -146,7 +146,9 @@ def test_path_params_delete(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncAgents:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_benchmarks.py b/tests/api_resources/test_benchmarks.py
index 12cb3870..97d3d5c9 100644
--- a/tests/api_resources/test_benchmarks.py
+++ b/tests/api_resources/test_benchmarks.py
@@ -131,7 +131,9 @@ def test_streaming_response_register(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncBenchmarks:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_completions.py b/tests/api_resources/test_completions.py
index 42dd8a95..355384b0 100644
--- a/tests/api_resources/test_completions.py
+++ b/tests/api_resources/test_completions.py
@@ -141,7 +141,9 @@ def test_streaming_response_create_overload_2(self, client: LlamaStackClient) ->
 
 
 class TestAsyncCompletions:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_datasets.py b/tests/api_resources/test_datasets.py
index 010e10d0..9cd17f45 100644
--- a/tests/api_resources/test_datasets.py
+++ b/tests/api_resources/test_datasets.py
@@ -228,7 +228,9 @@ def test_path_params_unregister(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncDatasets:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_embeddings.py b/tests/api_resources/test_embeddings.py
index 5d002024..5296e9c0 100644
--- a/tests/api_resources/test_embeddings.py
+++ b/tests/api_resources/test_embeddings.py
@@ -64,7 +64,9 @@ def test_streaming_response_create(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncEmbeddings:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_eval.py b/tests/api_resources/test_eval.py
index 8d04c104..878b3d28 100644
--- a/tests/api_resources/test_eval.py
+++ b/tests/api_resources/test_eval.py
@@ -566,7 +566,9 @@ def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncEval:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_files.py b/tests/api_resources/test_files.py
index 8dbcedec..7fc5e107 100644
--- a/tests/api_resources/test_files.py
+++ b/tests/api_resources/test_files.py
@@ -202,7 +202,9 @@ def test_path_params_content(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncFiles:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_inference.py b/tests/api_resources/test_inference.py
index d876ae56..21967c9a 100644
--- a/tests/api_resources/test_inference.py
+++ b/tests/api_resources/test_inference.py
@@ -527,7 +527,9 @@ def test_streaming_response_embeddings(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncInference:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_batch_chat_completion(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_inspect.py b/tests/api_resources/test_inspect.py
index e72ce766..a43abe6c 100644
--- a/tests/api_resources/test_inspect.py
+++ b/tests/api_resources/test_inspect.py
@@ -69,7 +69,9 @@ def test_streaming_response_version(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncInspect:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_health(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index a2c8e68a..2e3f15be 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -162,7 +162,9 @@ def test_path_params_unregister(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncModels:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_post_training.py b/tests/api_resources/test_post_training.py
index 1d0613da..5e7430fb 100644
--- a/tests/api_resources/test_post_training.py
+++ b/tests/api_resources/test_post_training.py
@@ -239,7 +239,9 @@ def test_streaming_response_supervised_fine_tune(self, client: LlamaStackClient)
 
 
 class TestAsyncPostTraining:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_providers.py b/tests/api_resources/test_providers.py
index 02f910b4..b23a84bd 100644
--- a/tests/api_resources/test_providers.py
+++ b/tests/api_resources/test_providers.py
@@ -82,7 +82,9 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncProviders:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
index e3343d3e..a3fa9fd1 100644
--- a/tests/api_resources/test_responses.py
+++ b/tests/api_resources/test_responses.py
@@ -220,7 +220,9 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncResponses:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_routes.py b/tests/api_resources/test_routes.py
index d434d5b8..12b51f28 100644
--- a/tests/api_resources/test_routes.py
+++ b/tests/api_resources/test_routes.py
@@ -44,7 +44,9 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncRoutes:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_safety.py b/tests/api_resources/test_safety.py
index e3dbe9b7..257dfd76 100644
--- a/tests/api_resources/test_safety.py
+++ b/tests/api_resources/test_safety.py
@@ -71,7 +71,9 @@ def test_streaming_response_run_shield(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncSafety:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_run_shield(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_scoring.py b/tests/api_resources/test_scoring.py
index ca818363..ed46bd07 100644
--- a/tests/api_resources/test_scoring.py
+++ b/tests/api_resources/test_scoring.py
@@ -135,7 +135,9 @@ def test_streaming_response_score_batch(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncScoring:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_score(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_scoring_functions.py b/tests/api_resources/test_scoring_functions.py
index d58d5c60..44556317 100644
--- a/tests/api_resources/test_scoring_functions.py
+++ b/tests/api_resources/test_scoring_functions.py
@@ -140,7 +140,9 @@ def test_streaming_response_register(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncScoringFunctions:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_shields.py b/tests/api_resources/test_shields.py
index a351a6f0..037a66d3 100644
--- a/tests/api_resources/test_shields.py
+++ b/tests/api_resources/test_shields.py
@@ -123,7 +123,9 @@ def test_streaming_response_register(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncShields:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_synthetic_data_generation.py b/tests/api_resources/test_synthetic_data_generation.py
index db409b53..c383770e 100644
--- a/tests/api_resources/test_synthetic_data_generation.py
+++ b/tests/api_resources/test_synthetic_data_generation.py
@@ -83,7 +83,9 @@ def test_streaming_response_generate(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncSyntheticDataGeneration:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_generate(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_telemetry.py b/tests/api_resources/test_telemetry.py
index 4f3c81d4..14a8801c 100644
--- a/tests/api_resources/test_telemetry.py
+++ b/tests/api_resources/test_telemetry.py
@@ -417,7 +417,9 @@ def test_streaming_response_save_spans_to_dataset(self, client: LlamaStackClient
 
 
 class TestAsyncTelemetry:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_get_span(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_tool_runtime.py b/tests/api_resources/test_tool_runtime.py
index b13e8c1f..fa79b1ba 100644
--- a/tests/api_resources/test_tool_runtime.py
+++ b/tests/api_resources/test_tool_runtime.py
@@ -89,7 +89,9 @@ def test_streaming_response_list_tools(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncToolRuntime:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_invoke_tool(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_toolgroups.py b/tests/api_resources/test_toolgroups.py
index 42a38226..1b8e5bce 100644
--- a/tests/api_resources/test_toolgroups.py
+++ b/tests/api_resources/test_toolgroups.py
@@ -164,7 +164,9 @@ def test_path_params_unregister(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncToolgroups:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_tools.py b/tests/api_resources/test_tools.py
index 2dd1ace1..3c1f0da4 100644
--- a/tests/api_resources/test_tools.py
+++ b/tests/api_resources/test_tools.py
@@ -89,7 +89,9 @@ def test_path_params_get(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncTools:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_vector_dbs.py b/tests/api_resources/test_vector_dbs.py
index d185edf1..68d6be89 100644
--- a/tests/api_resources/test_vector_dbs.py
+++ b/tests/api_resources/test_vector_dbs.py
@@ -169,7 +169,9 @@ def test_path_params_unregister(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncVectorDBs:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_vector_io.py b/tests/api_resources/test_vector_io.py
index dada826d..c62a58d3 100644
--- a/tests/api_resources/test_vector_io.py
+++ b/tests/api_resources/test_vector_io.py
@@ -37,7 +37,21 @@ def test_method_insert_with_all_params(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "metadata": {"foo": True},
+                    "chunk_metadata": {
+                        "chunk_embedding_dimension": 0,
+                        "chunk_embedding_model": "chunk_embedding_model",
+                        "chunk_id": "chunk_id",
+                        "chunk_tokenizer": "chunk_tokenizer",
+                        "chunk_window": "chunk_window",
+                        "content_token_count": 0,
+                        "created_timestamp": 0,
+                        "document_id": "document_id",
+                        "metadata_token_count": 0,
+                        "source": "source",
+                        "updated_timestamp": 0,
+                    },
                     "embedding": [0],
+                    "stored_chunk_id": "stored_chunk_id",
                 }
             ],
             vector_db_id="vector_db_id",
@@ -126,7 +140,9 @@ def test_streaming_response_query(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncVectorIo:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_insert(self, async_client: AsyncLlamaStackClient) -> None:
@@ -148,7 +164,21 @@ async def test_method_insert_with_all_params(self, async_client: AsyncLlamaStack
                 {
                     "content": "string",
                     "metadata": {"foo": True},
+                    "chunk_metadata": {
+                        "chunk_embedding_dimension": 0,
+                        "chunk_embedding_model": "chunk_embedding_model",
+                        "chunk_id": "chunk_id",
+                        "chunk_tokenizer": "chunk_tokenizer",
+                        "chunk_window": "chunk_window",
+                        "content_token_count": 0,
+                        "created_timestamp": 0,
+                        "document_id": "document_id",
+                        "metadata_token_count": 0,
+                        "source": "source",
+                        "updated_timestamp": 0,
+                    },
                     "embedding": [0],
+                    "stored_chunk_id": "stored_chunk_id",
                 }
             ],
             vector_db_id="vector_db_id",
diff --git a/tests/api_resources/test_vector_stores.py b/tests/api_resources/test_vector_stores.py
index bd63d5e7..84324ca4 100644
--- a/tests/api_resources/test_vector_stores.py
+++ b/tests/api_resources/test_vector_stores.py
@@ -247,6 +247,7 @@ def test_method_search_with_all_params(self, client: LlamaStackClient) -> None:
                 "score_threshold": 0,
             },
             rewrite_query=True,
+            search_mode="search_mode",
         )
         assert_matches_type(VectorStoreSearchResponse, vector_store, path=["response"])
 
@@ -286,7 +287,9 @@ def test_path_params_search(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncVectorStores:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
@@ -513,6 +516,7 @@ async def test_method_search_with_all_params(self, async_client: AsyncLlamaStack
                 "score_threshold": 0,
             },
             rewrite_query=True,
+            search_mode="search_mode",
         )
         assert_matches_type(VectorStoreSearchResponse, vector_store, path=["response"])
 
diff --git a/tests/api_resources/tool_runtime/test_rag_tool.py b/tests/api_resources/tool_runtime/test_rag_tool.py
index 16ea0bb5..17a64d8e 100644
--- a/tests/api_resources/tool_runtime/test_rag_tool.py
+++ b/tests/api_resources/tool_runtime/test_rag_tool.py
@@ -130,7 +130,9 @@ def test_streaming_response_query(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncRagTool:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_insert(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/vector_stores/test_files.py b/tests/api_resources/vector_stores/test_files.py
index c88e5c95..f9728a36 100644
--- a/tests/api_resources/vector_stores/test_files.py
+++ b/tests/api_resources/vector_stores/test_files.py
@@ -71,7 +71,9 @@ def test_path_params_create(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncFiles:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/conftest.py b/tests/conftest.py
index ed5e8a48..ddadec32 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,10 +6,12 @@
 import logging
 from typing import TYPE_CHECKING, Iterator, AsyncIterator
 
+import httpx
 import pytest
 from pytest_asyncio import is_async_test
 
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client import LlamaStackClient, DefaultAioHttpClient, AsyncLlamaStackClient
+from llama_stack_client._utils import is_dict
 
 if TYPE_CHECKING:
     from _pytest.fixtures import FixtureRequest  # pyright: ignore[reportPrivateImportUsage]
@@ -27,6 +29,19 @@ def pytest_collection_modifyitems(items: list[pytest.Function]) -> None:
     for async_test in pytest_asyncio_tests:
         async_test.add_marker(session_scope_marker, append=False)
 
+    # We skip tests that use both the aiohttp client and respx_mock as respx_mock
+    # doesn't support custom transports.
+    for item in items:
+        if "async_client" not in item.fixturenames or "respx_mock" not in item.fixturenames:
+            continue
+
+        if not hasattr(item, "callspec"):
+            continue
+
+        async_client_param = item.callspec.params.get("async_client")
+        if is_dict(async_client_param) and async_client_param.get("http_client") == "aiohttp":
+            item.add_marker(pytest.mark.skip(reason="aiohttp client is not compatible with respx_mock"))
+
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -43,9 +58,25 @@ def client(request: FixtureRequest) -> Iterator[LlamaStackClient]:
 
 @pytest.fixture(scope="session")
 async def async_client(request: FixtureRequest) -> AsyncIterator[AsyncLlamaStackClient]:
-    strict = getattr(request, "param", True)
-    if not isinstance(strict, bool):
-        raise TypeError(f"Unexpected fixture parameter type {type(strict)}, expected {bool}")
-
-    async with AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=strict) as client:
+    param = getattr(request, "param", True)
+
+    # defaults
+    strict = True
+    http_client: None | httpx.AsyncClient = None
+
+    if isinstance(param, bool):
+        strict = param
+    elif is_dict(param):
+        strict = param.get("strict", True)
+        assert isinstance(strict, bool)
+
+        http_client_type = param.get("http_client", "httpx")
+        if http_client_type == "aiohttp":
+            http_client = DefaultAioHttpClient()
+    else:
+        raise TypeError(f"Unexpected fixture parameter type {type(param)}, expected bool or dict")
+
+    async with AsyncLlamaStackClient(
+        base_url=base_url, _strict_response_validation=strict, http_client=http_client
+    ) as client:
         yield client
diff --git a/tests/test_client.py b/tests/test_client.py
index 59472837..6a1a8f85 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -182,6 +182,7 @@ def test_copy_signature(self) -> None:
             copy_param = copy_signature.parameters.get(name)
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
+    @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
     def test_copy_build_request(self) -> None:
         options = FinalRequestOptions(method="get", url="/foo")
 
@@ -993,6 +994,7 @@ def test_copy_signature(self) -> None:
             copy_param = copy_signature.parameters.get(name)
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
+    @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
     def test_copy_build_request(self) -> None:
         options = FinalRequestOptions(method="get", url="/foo")