llamastack · ashwinb · Jun 27, 2025
diff --git a/src/llama_stack_client/__init__.py b/src/llama_stack_client/__init__.py
@@ -36,7 +36,7 @@
     UnprocessableEntityError,
     APIResponseValidationError,
 )
-from ._base_client import DefaultHttpxClient, DefaultAsyncHttpxClient
+from ._base_client import DefaultHttpxClient, DefaultAioHttpClient, DefaultAsyncHttpxClient
 from ._utils._logs import setup_logging as _setup_logging
 
 from .lib.agents.agent import Agent
@@ -84,6 +84,7 @@
     "DEFAULT_CONNECTION_LIMITS",
     "DefaultHttpxClient",
     "DefaultAsyncHttpxClient",
+    "DefaultAioHttpClient",
 ]
 
 if not _t.TYPE_CHECKING:

diff --git a/src/llama_stack_client/_base_client.py b/src/llama_stack_client/_base_client.py
@@ -1289,6 +1289,24 @@ def __init__(self, **kwargs: Any) -> None:
         super().__init__(**kwargs)
 
 
+try:
+    import httpx_aiohttp
+except ImportError:
+
+    class _DefaultAioHttpClient(httpx.AsyncClient):
+        def __init__(self, **_kwargs: Any) -> None:
+            raise RuntimeError("To use the aiohttp client you must have installed the package with the `aiohttp` extra")
+else:
+
+    class _DefaultAioHttpClient(httpx_aiohttp.HttpxAiohttpClient):  # type: ignore
+        def __init__(self, **kwargs: Any) -> None:
+            kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+            kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+            kwargs.setdefault("follow_redirects", True)
+
+            super().__init__(**kwargs)
+
+
 if TYPE_CHECKING:
     DefaultAsyncHttpxClient = httpx.AsyncClient
     """An alias to `httpx.AsyncClient` that provides the same defaults that this SDK
@@ -1297,8 +1315,12 @@ def __init__(self, **kwargs: Any) -> None:
     This is useful because overriding the `http_client` with your own instance of
     `httpx.AsyncClient` will result in httpx's defaults being used, not ours.
     """
+
+    DefaultAioHttpClient = httpx.AsyncClient
+    """An alias to `httpx.AsyncClient` that changes the default HTTP transport to `aiohttp`."""
 else:
     DefaultAsyncHttpxClient = _DefaultAsyncHttpxClient
+    DefaultAioHttpClient = _DefaultAioHttpClient
 
 
 class AsyncHttpxClientWrapper(DefaultAsyncHttpxClient):

diff --git a/src/llama_stack_client/resources/vector_stores/vector_stores.py b/src/llama_stack_client/resources/vector_stores/vector_stores.py
@@ -320,6 +320,7 @@ def search(
         max_num_results: int | NotGiven = NOT_GIVEN,
         ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
         rewrite_query: bool | NotGiven = NOT_GIVEN,
+        search_mode: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -343,6 +344,8 @@ def search(
 
           rewrite_query: Whether to rewrite the natural language query for vector search (default false)
 
+          search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -362,6 +365,7 @@ def search(
                     "max_num_results": max_num_results,
                     "ranking_options": ranking_options,
                     "rewrite_query": rewrite_query,
+                    "search_mode": search_mode,
                 },
                 vector_store_search_params.VectorStoreSearchParams,
             ),
@@ -653,6 +657,7 @@ async def search(
         max_num_results: int | NotGiven = NOT_GIVEN,
         ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
         rewrite_query: bool | NotGiven = NOT_GIVEN,
+        search_mode: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -676,6 +681,8 @@ async def search(
 
           rewrite_query: Whether to rewrite the natural language query for vector search (default false)
 
+          search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -695,6 +702,7 @@ async def search(
                     "max_num_results": max_num_results,
                     "ranking_options": ranking_options,
                     "rewrite_query": rewrite_query,
+                    "search_mode": search_mode,
                 },
                 vector_store_search_params.VectorStoreSearchParams,
             ),

diff --git a/src/llama_stack_client/types/query_chunks_response.py b/src/llama_stack_client/types/query_chunks_response.py
@@ -5,7 +5,45 @@
 from .._models import BaseModel
 from .shared.interleaved_content import InterleavedContent
 
-__all__ = ["QueryChunksResponse", "Chunk"]
+__all__ = ["QueryChunksResponse", "Chunk", "ChunkChunkMetadata"]
+
+
+class ChunkChunkMetadata(BaseModel):
+    chunk_embedding_dimension: Optional[int] = None
+    """The dimension of the embedding vector for the chunk."""
+
+    chunk_embedding_model: Optional[str] = None
+    """The embedding model used to create the chunk's embedding."""
+
+    chunk_id: Optional[str] = None
+    """The ID of the chunk.
+
+    If not set, it will be generated based on the document ID and content.
+    """
+
+    chunk_tokenizer: Optional[str] = None
+    """The tokenizer used to create the chunk. Default is Tiktoken."""
+
+    chunk_window: Optional[str] = None
+    """The window of the chunk, which can be used to group related chunks together."""
+
+    content_token_count: Optional[int] = None
+    """The number of tokens in the content of the chunk."""
+
+    created_timestamp: Optional[int] = None
+    """An optional timestamp indicating when the chunk was created."""
+
+    document_id: Optional[str] = None
+    """The ID of the document this chunk belongs to."""
+
+    metadata_token_count: Optional[int] = None
+    """The number of tokens in the metadata of the chunk."""
+
+    source: Optional[str] = None
+    """The source of the content, such as a URL, file path, or other identifier."""
+
+    updated_timestamp: Optional[int] = None
+    """An optional timestamp indicating when the chunk was last updated."""
 
 
 class Chunk(BaseModel):
@@ -16,13 +54,25 @@ class Chunk(BaseModel):
 
     metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
     """
-    Metadata associated with the chunk, such as document ID, source, or other
-    relevant information.
+    Metadata associated with the chunk that will be used in the model context during
+    inference.
+    """
+
+    chunk_metadata: Optional[ChunkChunkMetadata] = None
+    """Metadata for the chunk that will NOT be used in the context during inference.
+
+    The `chunk_metadata` is required backend functionality.
     """
 
     embedding: Optional[List[float]] = None
     """Optional embedding for the chunk. If not provided, it will be computed later."""
 
+    stored_chunk_id: Optional[str] = None
+    """The chunk ID that is stored in the vector database.
+
+    Used for backend functionality.
+    """
+
 
 class QueryChunksResponse(BaseModel):
     chunks: List[Chunk]

diff --git a/src/llama_stack_client/types/response_create_params.py b/src/llama_stack_client/types/response_create_params.py
@@ -17,6 +17,11 @@
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember2",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2Annotation",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
     "Text",
     "TextFormat",
     "Tool",
@@ -131,7 +136,69 @@ class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInp
 ]
 
 
+class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(
+    TypedDict, total=False
+):
+    file_id: Required[str]
+
+    filename: Required[str]
+
+    index: Required[int]
+
+    type: Required[Literal["file_citation"]]
+
+
+class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(
+    TypedDict, total=False
+):
+    end_index: Required[int]
+
+    start_index: Required[int]
+
+    title: Required[str]
+
+    type: Required[Literal["url_citation"]]
+
+    url: Required[str]
+
+
+class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
+    TypedDict, total=False
+):
+    container_id: Required[str]
+
+    end_index: Required[int]
+
+    file_id: Required[str]
+
+    filename: Required[str]
+
+    start_index: Required[int]
+
+    type: Required[Literal["container_file_citation"]]
+
+
+class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(
+    TypedDict, total=False
+):
+    file_id: Required[str]
+
+    index: Required[int]
+
+    type: Required[Literal["file_path"]]
+
+
+InputUnionMember1OpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Union[
+    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+]
+
+
 class InputUnionMember1OpenAIResponseMessageContentUnionMember2(TypedDict, total=False):
+    annotations: Required[Iterable[InputUnionMember1OpenAIResponseMessageContentUnionMember2Annotation]]
+
     text: Required[str]
 
     type: Required[Literal["output_text"]]
@@ -193,7 +260,7 @@ class Text(TypedDict, total=False):
 
 
 class ToolOpenAIResponseInputToolWebSearch(TypedDict, total=False):
-    type: Required[Literal["web_search", "web_search_preview_2025_03_11"]]
+    type: Required[Literal["web_search", "web_search_preview", "web_search_preview_2025_03_11"]]
 
     search_context_size: str