From b914e86a278d5cdac6a6c081861a53669da3c8be Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Fri, 27 Jun 2025 21:31:46 +0530
Subject: [PATCH] Sync updates from stainless:
 0cdc40575bdf6f1771a1ad595a9fa654a0b2160a

---
 src/llama_stack_client/__init__.py            |   3 +-
 src/llama_stack_client/_base_client.py        |  22 +++
 src/llama_stack_client/_files.py              |   2 +-
 .../resources/agents/agents.py                |   8 +-
 .../resources/agents/session.py               |   8 +-
 .../resources/agents/steps.py                 |   8 +-
 .../resources/agents/turn.py                  |   8 +-
 .../resources/benchmarks.py                   |   8 +-
 src/llama_stack_client/resources/chat/chat.py |   8 +-
 .../resources/chat/completions.py             |   8 +-
 .../resources/completions.py                  |   8 +-
 src/llama_stack_client/resources/datasets.py  |   8 +-
 .../resources/embeddings.py                   |   8 +-
 src/llama_stack_client/resources/eval/eval.py |   8 +-
 src/llama_stack_client/resources/eval/jobs.py |   8 +-
 src/llama_stack_client/resources/files.py     |   8 +-
 src/llama_stack_client/resources/inference.py |   8 +-
 src/llama_stack_client/resources/inspect.py   |   8 +-
 src/llama_stack_client/resources/models.py    |   8 +-
 .../resources/post_training/job.py            |   8 +-
 .../resources/post_training/post_training.py  |   8 +-
 src/llama_stack_client/resources/providers.py |   8 +-
 .../resources/responses/input_items.py        |   8 +-
 .../resources/responses/responses.py          |   8 +-
 src/llama_stack_client/resources/routes.py    |   8 +-
 src/llama_stack_client/resources/safety.py    |   8 +-
 src/llama_stack_client/resources/scoring.py   |   8 +-
 .../resources/scoring_functions.py            |   8 +-
 src/llama_stack_client/resources/shields.py   |   8 +-
 .../resources/synthetic_data_generation.py    |   8 +-
 src/llama_stack_client/resources/telemetry.py |   8 +-
 .../resources/tool_runtime/rag_tool.py        |   8 +-
 .../resources/tool_runtime/tool_runtime.py    |   8 +-
 .../resources/toolgroups.py                   |   8 +-
 src/llama_stack_client/resources/tools.py     |   8 +-
 .../resources/vector_dbs.py                   |   8 +-
 src/llama_stack_client/resources/vector_io.py |   8 +-
 .../resources/vector_stores/files.py          |   8 +-
 .../resources/vector_stores/vector_stores.py  |  16 +-
 .../types/query_chunks_response.py            |  56 ++++++-
 .../types/response_create_params.py           |  69 ++++++++-
 .../types/response_list_response.py           | 128 ++++++++++++++++
 .../types/response_object.py                  |  72 +++++++++
 .../types/response_object_stream.py           | 144 ++++++++++++++++++
 .../responses/input_item_list_response.py     |  62 ++++++++
 .../types/vector_io_insert_params.py          |  56 ++++++-
 .../types/vector_store_search_params.py       |   3 +
 tests/api_resources/agents/test_session.py    |   4 +-
 tests/api_resources/agents/test_steps.py      |   4 +-
 tests/api_resources/agents/test_turn.py       |   4 +-
 tests/api_resources/chat/test_completions.py  |   4 +-
 tests/api_resources/eval/test_jobs.py         |   4 +-
 tests/api_resources/post_training/test_job.py |   4 +-
 .../responses/test_input_items.py             |   4 +-
 tests/api_resources/test_agents.py            |   4 +-
 tests/api_resources/test_benchmarks.py        |   4 +-
 tests/api_resources/test_completions.py       |   4 +-
 tests/api_resources/test_datasets.py          |   4 +-
 tests/api_resources/test_embeddings.py        |   4 +-
 tests/api_resources/test_eval.py              |   4 +-
 tests/api_resources/test_files.py             |   4 +-
 tests/api_resources/test_inference.py         |   4 +-
 tests/api_resources/test_inspect.py           |   4 +-
 tests/api_resources/test_models.py            |   4 +-
 tests/api_resources/test_post_training.py     |   4 +-
 tests/api_resources/test_providers.py         |   4 +-
 tests/api_resources/test_responses.py         |   4 +-
 tests/api_resources/test_routes.py            |   4 +-
 tests/api_resources/test_safety.py            |   4 +-
 tests/api_resources/test_scoring.py           |   4 +-
 tests/api_resources/test_scoring_functions.py |   4 +-
 tests/api_resources/test_shields.py           |   4 +-
 .../test_synthetic_data_generation.py         |   4 +-
 tests/api_resources/test_telemetry.py         |   4 +-
 tests/api_resources/test_tool_runtime.py      |   4 +-
 tests/api_resources/test_toolgroups.py        |   4 +-
 tests/api_resources/test_tools.py             |   4 +-
 tests/api_resources/test_vector_dbs.py        |   4 +-
 tests/api_resources/test_vector_io.py         |  32 +++-
 tests/api_resources/test_vector_stores.py     |   6 +-
 .../tool_runtime/test_rag_tool.py             |   4 +-
 .../api_resources/vector_stores/test_files.py |   4 +-
 tests/conftest.py                             |  43 +++++-
 tests/test_client.py                          |   2 +
 84 files changed, 934 insertions(+), 194 deletions(-)

diff --git a/src/llama_stack_client/__init__.py b/src/llama_stack_client/__init__.py
index 7dc65e78..4fdd36f2 100644
--- a/src/llama_stack_client/__init__.py
+++ b/src/llama_stack_client/__init__.py
@@ -36,7 +36,7 @@
     UnprocessableEntityError,
     APIResponseValidationError,
 )
-from ._base_client import DefaultHttpxClient, DefaultAsyncHttpxClient
+from ._base_client import DefaultHttpxClient, DefaultAioHttpClient, DefaultAsyncHttpxClient
 from ._utils._logs import setup_logging as _setup_logging
 
 from .lib.agents.agent import Agent
@@ -84,6 +84,7 @@
     "DEFAULT_CONNECTION_LIMITS",
     "DefaultHttpxClient",
     "DefaultAsyncHttpxClient",
+    "DefaultAioHttpClient",
 ]
 
 if not _t.TYPE_CHECKING:
diff --git a/src/llama_stack_client/_base_client.py b/src/llama_stack_client/_base_client.py
index 4475c723..a0bbc468 100644
--- a/src/llama_stack_client/_base_client.py
+++ b/src/llama_stack_client/_base_client.py
@@ -1289,6 +1289,24 @@ def __init__(self, **kwargs: Any) -> None:
         super().__init__(**kwargs)
 
 
+try:
+    import httpx_aiohttp
+except ImportError:
+
+    class _DefaultAioHttpClient(httpx.AsyncClient):
+        def __init__(self, **_kwargs: Any) -> None:
+            raise RuntimeError("To use the aiohttp client you must have installed the package with the `aiohttp` extra")
+else:
+
+    class _DefaultAioHttpClient(httpx_aiohttp.HttpxAiohttpClient):  # type: ignore
+        def __init__(self, **kwargs: Any) -> None:
+            kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+            kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
+            kwargs.setdefault("follow_redirects", True)
+
+            super().__init__(**kwargs)
+
+
 if TYPE_CHECKING:
     DefaultAsyncHttpxClient = httpx.AsyncClient
     """An alias to `httpx.AsyncClient` that provides the same defaults that this SDK
@@ -1297,8 +1315,12 @@ def __init__(self, **kwargs: Any) -> None:
     This is useful because overriding the `http_client` with your own instance of
     `httpx.AsyncClient` will result in httpx's defaults being used, not ours.
     """
+
+    DefaultAioHttpClient = httpx.AsyncClient
+    """An alias to `httpx.AsyncClient` that changes the default HTTP transport to `aiohttp`."""
 else:
     DefaultAsyncHttpxClient = _DefaultAsyncHttpxClient
+    DefaultAioHttpClient = _DefaultAioHttpClient
 
 
 class AsyncHttpxClientWrapper(DefaultAsyncHttpxClient):
diff --git a/src/llama_stack_client/_files.py b/src/llama_stack_client/_files.py
index cd5d4f51..45f57c0a 100644
--- a/src/llama_stack_client/_files.py
+++ b/src/llama_stack_client/_files.py
@@ -34,7 +34,7 @@ def assert_is_file_content(obj: object, *, key: str | None = None) -> None:
     if not is_file_content(obj):
         prefix = f"Expected entry at `{key}`" if key is not None else f"Expected file input `{obj!r}`"
         raise RuntimeError(
-            f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(obj)} instead. See https://github.com/stainless-sdks/llama-stack-python/tree/main#file-uploads"
+            f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(obj)} instead. See https://github.com/llamastack/llama-stack-client-python/tree/main#file-uploads"
         ) from None
 
 
diff --git a/src/llama_stack_client/resources/agents/agents.py b/src/llama_stack_client/resources/agents/agents.py
index 9c06cbd1..5b34cea8 100644
--- a/src/llama_stack_client/resources/agents/agents.py
+++ b/src/llama_stack_client/resources/agents/agents.py
@@ -65,7 +65,7 @@ def with_raw_response(self) -> AgentsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AgentsResourceWithRawResponse(self)
 
@@ -74,7 +74,7 @@ def with_streaming_response(self) -> AgentsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AgentsResourceWithStreamingResponse(self)
 
@@ -166,7 +166,7 @@ def with_raw_response(self) -> AsyncAgentsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncAgentsResourceWithRawResponse(self)
 
@@ -175,7 +175,7 @@ def with_streaming_response(self) -> AsyncAgentsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncAgentsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/agents/session.py b/src/llama_stack_client/resources/agents/session.py
index ebdde5c6..ccefeb0b 100644
--- a/src/llama_stack_client/resources/agents/session.py
+++ b/src/llama_stack_client/resources/agents/session.py
@@ -31,7 +31,7 @@ def with_raw_response(self) -> SessionResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return SessionResourceWithRawResponse(self)
 
@@ -40,7 +40,7 @@ def with_streaming_response(self) -> SessionResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return SessionResourceWithStreamingResponse(self)
 
@@ -169,7 +169,7 @@ def with_raw_response(self) -> AsyncSessionResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncSessionResourceWithRawResponse(self)
 
@@ -178,7 +178,7 @@ def with_streaming_response(self) -> AsyncSessionResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncSessionResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/agents/steps.py b/src/llama_stack_client/resources/agents/steps.py
index 1dc60b92..78f9a88e 100644
--- a/src/llama_stack_client/resources/agents/steps.py
+++ b/src/llama_stack_client/resources/agents/steps.py
@@ -26,7 +26,7 @@ def with_raw_response(self) -> StepsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return StepsResourceWithRawResponse(self)
 
@@ -35,7 +35,7 @@ def with_streaming_response(self) -> StepsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return StepsResourceWithStreamingResponse(self)
 
@@ -89,7 +89,7 @@ def with_raw_response(self) -> AsyncStepsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncStepsResourceWithRawResponse(self)
 
@@ -98,7 +98,7 @@ def with_streaming_response(self) -> AsyncStepsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncStepsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/agents/turn.py
index 8c48869e..b98b593b 100644
--- a/src/llama_stack_client/resources/agents/turn.py
+++ b/src/llama_stack_client/resources/agents/turn.py
@@ -34,7 +34,7 @@ def with_raw_response(self) -> TurnResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return TurnResourceWithRawResponse(self)
 
@@ -43,7 +43,7 @@ def with_streaming_response(self) -> TurnResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return TurnResourceWithStreamingResponse(self)
 
@@ -428,7 +428,7 @@ def with_raw_response(self) -> AsyncTurnResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncTurnResourceWithRawResponse(self)
 
@@ -437,7 +437,7 @@ def with_streaming_response(self) -> AsyncTurnResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncTurnResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/benchmarks.py b/src/llama_stack_client/resources/benchmarks.py
index f22865cb..7b92833b 100644
--- a/src/llama_stack_client/resources/benchmarks.py
+++ b/src/llama_stack_client/resources/benchmarks.py
@@ -32,7 +32,7 @@ def with_raw_response(self) -> BenchmarksResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return BenchmarksResourceWithRawResponse(self)
 
@@ -41,7 +41,7 @@ def with_streaming_response(self) -> BenchmarksResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return BenchmarksResourceWithStreamingResponse(self)
 
@@ -169,7 +169,7 @@ def with_raw_response(self) -> AsyncBenchmarksResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncBenchmarksResourceWithRawResponse(self)
 
@@ -178,7 +178,7 @@ def with_streaming_response(self) -> AsyncBenchmarksResourceWithStreamingRespons
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncBenchmarksResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/chat/chat.py b/src/llama_stack_client/resources/chat/chat.py
index 681051f3..3e3715c1 100644
--- a/src/llama_stack_client/resources/chat/chat.py
+++ b/src/llama_stack_client/resources/chat/chat.py
@@ -27,7 +27,7 @@ def with_raw_response(self) -> ChatResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return ChatResourceWithRawResponse(self)
 
@@ -36,7 +36,7 @@ def with_streaming_response(self) -> ChatResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return ChatResourceWithStreamingResponse(self)
 
@@ -52,7 +52,7 @@ def with_raw_response(self) -> AsyncChatResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncChatResourceWithRawResponse(self)
 
@@ -61,7 +61,7 @@ def with_streaming_response(self) -> AsyncChatResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncChatResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/chat/completions.py b/src/llama_stack_client/resources/chat/completions.py
index 1ab543c5..1355f97a 100644
--- a/src/llama_stack_client/resources/chat/completions.py
+++ b/src/llama_stack_client/resources/chat/completions.py
@@ -35,7 +35,7 @@ def with_raw_response(self) -> CompletionsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return CompletionsResourceWithRawResponse(self)
 
@@ -44,7 +44,7 @@ def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return CompletionsResourceWithStreamingResponse(self)
 
@@ -515,7 +515,7 @@ def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncCompletionsResourceWithRawResponse(self)
 
@@ -524,7 +524,7 @@ def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingRespon
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncCompletionsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/completions.py b/src/llama_stack_client/resources/completions.py
index c4acf525..23554ccb 100644
--- a/src/llama_stack_client/resources/completions.py
+++ b/src/llama_stack_client/resources/completions.py
@@ -32,7 +32,7 @@ def with_raw_response(self) -> CompletionsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return CompletionsResourceWithRawResponse(self)
 
@@ -41,7 +41,7 @@ def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return CompletionsResourceWithStreamingResponse(self)
 
@@ -370,7 +370,7 @@ def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncCompletionsResourceWithRawResponse(self)
 
@@ -379,7 +379,7 @@ def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingRespon
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncCompletionsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/datasets.py b/src/llama_stack_client/resources/datasets.py
index 447e2305..45dcaeba 100644
--- a/src/llama_stack_client/resources/datasets.py
+++ b/src/llama_stack_client/resources/datasets.py
@@ -35,7 +35,7 @@ def with_raw_response(self) -> DatasetsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return DatasetsResourceWithRawResponse(self)
 
@@ -44,7 +44,7 @@ def with_streaming_response(self) -> DatasetsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return DatasetsResourceWithStreamingResponse(self)
 
@@ -273,7 +273,7 @@ def with_raw_response(self) -> AsyncDatasetsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncDatasetsResourceWithRawResponse(self)
 
@@ -282,7 +282,7 @@ def with_streaming_response(self) -> AsyncDatasetsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncDatasetsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/embeddings.py b/src/llama_stack_client/resources/embeddings.py
index 862653b3..144ebbf2 100644
--- a/src/llama_stack_client/resources/embeddings.py
+++ b/src/llama_stack_client/resources/embeddings.py
@@ -30,7 +30,7 @@ def with_raw_response(self) -> EmbeddingsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return EmbeddingsResourceWithRawResponse(self)
 
@@ -39,7 +39,7 @@ def with_streaming_response(self) -> EmbeddingsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return EmbeddingsResourceWithStreamingResponse(self)
 
@@ -112,7 +112,7 @@ def with_raw_response(self) -> AsyncEmbeddingsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncEmbeddingsResourceWithRawResponse(self)
 
@@ -121,7 +121,7 @@ def with_streaming_response(self) -> AsyncEmbeddingsResourceWithStreamingRespons
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncEmbeddingsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/eval/eval.py b/src/llama_stack_client/resources/eval/eval.py
index 23d1500c..006f1717 100644
--- a/src/llama_stack_client/resources/eval/eval.py
+++ b/src/llama_stack_client/resources/eval/eval.py
@@ -49,7 +49,7 @@ def with_raw_response(self) -> EvalResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return EvalResourceWithRawResponse(self)
 
@@ -58,7 +58,7 @@ def with_streaming_response(self) -> EvalResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return EvalResourceWithStreamingResponse(self)
 
@@ -250,7 +250,7 @@ def with_raw_response(self) -> AsyncEvalResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncEvalResourceWithRawResponse(self)
 
@@ -259,7 +259,7 @@ def with_streaming_response(self) -> AsyncEvalResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncEvalResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/eval/jobs.py b/src/llama_stack_client/resources/eval/jobs.py
index d46b63f9..16fa337f 100644
--- a/src/llama_stack_client/resources/eval/jobs.py
+++ b/src/llama_stack_client/resources/eval/jobs.py
@@ -27,7 +27,7 @@ def with_raw_response(self) -> JobsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return JobsResourceWithRawResponse(self)
 
@@ -36,7 +36,7 @@ def with_streaming_response(self) -> JobsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return JobsResourceWithStreamingResponse(self)
 
@@ -157,7 +157,7 @@ def with_raw_response(self) -> AsyncJobsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncJobsResourceWithRawResponse(self)
 
@@ -166,7 +166,7 @@ def with_streaming_response(self) -> AsyncJobsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncJobsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/files.py b/src/llama_stack_client/resources/files.py
index d795a42f..3eac6486 100644
--- a/src/llama_stack_client/resources/files.py
+++ b/src/llama_stack_client/resources/files.py
@@ -33,7 +33,7 @@ def with_raw_response(self) -> FilesResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return FilesResourceWithRawResponse(self)
 
@@ -42,7 +42,7 @@ def with_streaming_response(self) -> FilesResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return FilesResourceWithStreamingResponse(self)
 
@@ -264,7 +264,7 @@ def with_raw_response(self) -> AsyncFilesResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncFilesResourceWithRawResponse(self)
 
@@ -273,7 +273,7 @@ def with_streaming_response(self) -> AsyncFilesResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncFilesResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py
index a8bebdad..84a8dd96 100644
--- a/src/llama_stack_client/resources/inference.py
+++ b/src/llama_stack_client/resources/inference.py
@@ -48,7 +48,7 @@ def with_raw_response(self) -> InferenceResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return InferenceResourceWithRawResponse(self)
 
@@ -57,7 +57,7 @@ def with_streaming_response(self) -> InferenceResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return InferenceResourceWithStreamingResponse(self)
 
@@ -686,7 +686,7 @@ def with_raw_response(self) -> AsyncInferenceResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncInferenceResourceWithRawResponse(self)
 
@@ -695,7 +695,7 @@ def with_streaming_response(self) -> AsyncInferenceResourceWithStreamingResponse
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncInferenceResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/inspect.py b/src/llama_stack_client/resources/inspect.py
index dd9044e3..eb028c16 100644
--- a/src/llama_stack_client/resources/inspect.py
+++ b/src/llama_stack_client/resources/inspect.py
@@ -27,7 +27,7 @@ def with_raw_response(self) -> InspectResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return InspectResourceWithRawResponse(self)
 
@@ -36,7 +36,7 @@ def with_streaming_response(self) -> InspectResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return InspectResourceWithStreamingResponse(self)
 
@@ -86,7 +86,7 @@ def with_raw_response(self) -> AsyncInspectResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncInspectResourceWithRawResponse(self)
 
@@ -95,7 +95,7 @@ def with_streaming_response(self) -> AsyncInspectResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncInspectResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/models.py b/src/llama_stack_client/resources/models.py
index b3fcaa69..4efb632a 100644
--- a/src/llama_stack_client/resources/models.py
+++ b/src/llama_stack_client/resources/models.py
@@ -33,7 +33,7 @@ def with_raw_response(self) -> ModelsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return ModelsResourceWithRawResponse(self)
 
@@ -42,7 +42,7 @@ def with_streaming_response(self) -> ModelsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return ModelsResourceWithStreamingResponse(self)
 
@@ -199,7 +199,7 @@ def with_raw_response(self) -> AsyncModelsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncModelsResourceWithRawResponse(self)
 
@@ -208,7 +208,7 @@ def with_streaming_response(self) -> AsyncModelsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncModelsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/post_training/job.py b/src/llama_stack_client/resources/post_training/job.py
index 51a6ccda..2252b19e 100644
--- a/src/llama_stack_client/resources/post_training/job.py
+++ b/src/llama_stack_client/resources/post_training/job.py
@@ -33,7 +33,7 @@ def with_raw_response(self) -> JobResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return JobResourceWithRawResponse(self)
 
@@ -42,7 +42,7 @@ def with_streaming_response(self) -> JobResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return JobResourceWithStreamingResponse(self)
 
@@ -186,7 +186,7 @@ def with_raw_response(self) -> AsyncJobResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncJobResourceWithRawResponse(self)
 
@@ -195,7 +195,7 @@ def with_streaming_response(self) -> AsyncJobResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncJobResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/post_training/post_training.py b/src/llama_stack_client/resources/post_training/post_training.py
index 6149edc3..ff1fab45 100644
--- a/src/llama_stack_client/resources/post_training/post_training.py
+++ b/src/llama_stack_client/resources/post_training/post_training.py
@@ -46,7 +46,7 @@ def with_raw_response(self) -> PostTrainingResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return PostTrainingResourceWithRawResponse(self)
 
@@ -55,7 +55,7 @@ def with_streaming_response(self) -> PostTrainingResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return PostTrainingResourceWithStreamingResponse(self)
 
@@ -193,7 +193,7 @@ def with_raw_response(self) -> AsyncPostTrainingResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncPostTrainingResourceWithRawResponse(self)
 
@@ -202,7 +202,7 @@ def with_streaming_response(self) -> AsyncPostTrainingResourceWithStreamingRespo
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncPostTrainingResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/providers.py b/src/llama_stack_client/resources/providers.py
index a1c092eb..a50f7d67 100644
--- a/src/llama_stack_client/resources/providers.py
+++ b/src/llama_stack_client/resources/providers.py
@@ -30,7 +30,7 @@ def with_raw_response(self) -> ProvidersResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return ProvidersResourceWithRawResponse(self)
 
@@ -39,7 +39,7 @@ def with_streaming_response(self) -> ProvidersResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return ProvidersResourceWithStreamingResponse(self)
 
@@ -107,7 +107,7 @@ def with_raw_response(self) -> AsyncProvidersResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncProvidersResourceWithRawResponse(self)
 
@@ -116,7 +116,7 @@ def with_streaming_response(self) -> AsyncProvidersResourceWithStreamingResponse
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncProvidersResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/responses/input_items.py b/src/llama_stack_client/resources/responses/input_items.py
index 5379ad4d..08139af7 100644
--- a/src/llama_stack_client/resources/responses/input_items.py
+++ b/src/llama_stack_client/resources/responses/input_items.py
@@ -31,7 +31,7 @@ def with_raw_response(self) -> InputItemsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return InputItemsResourceWithRawResponse(self)
 
@@ -40,7 +40,7 @@ def with_streaming_response(self) -> InputItemsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return InputItemsResourceWithStreamingResponse(self)
 
@@ -114,7 +114,7 @@ def with_raw_response(self) -> AsyncInputItemsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncInputItemsResourceWithRawResponse(self)
 
@@ -123,7 +123,7 @@ def with_streaming_response(self) -> AsyncInputItemsResourceWithStreamingRespons
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncInputItemsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/responses/responses.py b/src/llama_stack_client/resources/responses/responses.py
index ba147335..fa05f7ed 100644
--- a/src/llama_stack_client/resources/responses/responses.py
+++ b/src/llama_stack_client/resources/responses/responses.py
@@ -46,7 +46,7 @@ def with_raw_response(self) -> ResponsesResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return ResponsesResourceWithRawResponse(self)
 
@@ -55,7 +55,7 @@ def with_streaming_response(self) -> ResponsesResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return ResponsesResourceWithStreamingResponse(self)
 
@@ -335,7 +335,7 @@ def with_raw_response(self) -> AsyncResponsesResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncResponsesResourceWithRawResponse(self)
 
@@ -344,7 +344,7 @@ def with_streaming_response(self) -> AsyncResponsesResourceWithStreamingResponse
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncResponsesResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/routes.py b/src/llama_stack_client/resources/routes.py
index d7c7cfff..a95b5e06 100644
--- a/src/llama_stack_client/resources/routes.py
+++ b/src/llama_stack_client/resources/routes.py
@@ -29,7 +29,7 @@ def with_raw_response(self) -> RoutesResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return RoutesResourceWithRawResponse(self)
 
@@ -38,7 +38,7 @@ def with_streaming_response(self) -> RoutesResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return RoutesResourceWithStreamingResponse(self)
 
@@ -73,7 +73,7 @@ def with_raw_response(self) -> AsyncRoutesResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncRoutesResourceWithRawResponse(self)
 
@@ -82,7 +82,7 @@ def with_streaming_response(self) -> AsyncRoutesResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncRoutesResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/safety.py b/src/llama_stack_client/resources/safety.py
index abd2e775..813a1f67 100644
--- a/src/llama_stack_client/resources/safety.py
+++ b/src/llama_stack_client/resources/safety.py
@@ -31,7 +31,7 @@ def with_raw_response(self) -> SafetyResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return SafetyResourceWithRawResponse(self)
 
@@ -40,7 +40,7 @@ def with_streaming_response(self) -> SafetyResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return SafetyResourceWithStreamingResponse(self)
 
@@ -99,7 +99,7 @@ def with_raw_response(self) -> AsyncSafetyResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncSafetyResourceWithRawResponse(self)
 
@@ -108,7 +108,7 @@ def with_streaming_response(self) -> AsyncSafetyResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncSafetyResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/scoring.py b/src/llama_stack_client/resources/scoring.py
index dd650a82..3e64f8eb 100644
--- a/src/llama_stack_client/resources/scoring.py
+++ b/src/llama_stack_client/resources/scoring.py
@@ -32,7 +32,7 @@ def with_raw_response(self) -> ScoringResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return ScoringResourceWithRawResponse(self)
 
@@ -41,7 +41,7 @@ def with_streaming_response(self) -> ScoringResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return ScoringResourceWithStreamingResponse(self)
 
@@ -143,7 +143,7 @@ def with_raw_response(self) -> AsyncScoringResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncScoringResourceWithRawResponse(self)
 
@@ -152,7 +152,7 @@ def with_streaming_response(self) -> AsyncScoringResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncScoringResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/scoring_functions.py b/src/llama_stack_client/resources/scoring_functions.py
index 07d9586f..e1a77477 100644
--- a/src/llama_stack_client/resources/scoring_functions.py
+++ b/src/llama_stack_client/resources/scoring_functions.py
@@ -34,7 +34,7 @@ def with_raw_response(self) -> ScoringFunctionsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return ScoringFunctionsResourceWithRawResponse(self)
 
@@ -43,7 +43,7 @@ def with_streaming_response(self) -> ScoringFunctionsResourceWithStreamingRespon
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return ScoringFunctionsResourceWithStreamingResponse(self)
 
@@ -170,7 +170,7 @@ def with_raw_response(self) -> AsyncScoringFunctionsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncScoringFunctionsResourceWithRawResponse(self)
 
@@ -179,7 +179,7 @@ def with_streaming_response(self) -> AsyncScoringFunctionsResourceWithStreamingR
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncScoringFunctionsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/shields.py b/src/llama_stack_client/resources/shields.py
index 5982298a..cf0c7678 100644
--- a/src/llama_stack_client/resources/shields.py
+++ b/src/llama_stack_client/resources/shields.py
@@ -32,7 +32,7 @@ def with_raw_response(self) -> ShieldsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return ShieldsResourceWithRawResponse(self)
 
@@ -41,7 +41,7 @@ def with_streaming_response(self) -> ShieldsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return ShieldsResourceWithStreamingResponse(self)
 
@@ -160,7 +160,7 @@ def with_raw_response(self) -> AsyncShieldsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncShieldsResourceWithRawResponse(self)
 
@@ -169,7 +169,7 @@ def with_streaming_response(self) -> AsyncShieldsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncShieldsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/synthetic_data_generation.py b/src/llama_stack_client/resources/synthetic_data_generation.py
index 59df1b39..6e4e5a08 100644
--- a/src/llama_stack_client/resources/synthetic_data_generation.py
+++ b/src/llama_stack_client/resources/synthetic_data_generation.py
@@ -32,7 +32,7 @@ def with_raw_response(self) -> SyntheticDataGenerationResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return SyntheticDataGenerationResourceWithRawResponse(self)
 
@@ -41,7 +41,7 @@ def with_streaming_response(self) -> SyntheticDataGenerationResourceWithStreamin
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return SyntheticDataGenerationResourceWithStreamingResponse(self)
 
@@ -94,7 +94,7 @@ def with_raw_response(self) -> AsyncSyntheticDataGenerationResourceWithRawRespon
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncSyntheticDataGenerationResourceWithRawResponse(self)
 
@@ -103,7 +103,7 @@ def with_streaming_response(self) -> AsyncSyntheticDataGenerationResourceWithStr
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncSyntheticDataGenerationResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/telemetry.py b/src/llama_stack_client/resources/telemetry.py
index 96af4528..1cec537e 100644
--- a/src/llama_stack_client/resources/telemetry.py
+++ b/src/llama_stack_client/resources/telemetry.py
@@ -43,7 +43,7 @@ def with_raw_response(self) -> TelemetryResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return TelemetryResourceWithRawResponse(self)
 
@@ -52,7 +52,7 @@ def with_streaming_response(self) -> TelemetryResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return TelemetryResourceWithStreamingResponse(self)
 
@@ -385,7 +385,7 @@ def with_raw_response(self) -> AsyncTelemetryResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncTelemetryResourceWithRawResponse(self)
 
@@ -394,7 +394,7 @@ def with_streaming_response(self) -> AsyncTelemetryResourceWithStreamingResponse
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncTelemetryResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/tool_runtime/rag_tool.py b/src/llama_stack_client/resources/tool_runtime/rag_tool.py
index 0ed84418..65ef0463 100644
--- a/src/llama_stack_client/resources/tool_runtime/rag_tool.py
+++ b/src/llama_stack_client/resources/tool_runtime/rag_tool.py
@@ -33,7 +33,7 @@ def with_raw_response(self) -> RagToolResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return RagToolResourceWithRawResponse(self)
 
@@ -42,7 +42,7 @@ def with_streaming_response(self) -> RagToolResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return RagToolResourceWithStreamingResponse(self)
 
@@ -141,7 +141,7 @@ def with_raw_response(self) -> AsyncRagToolResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncRagToolResourceWithRawResponse(self)
 
@@ -150,7 +150,7 @@ def with_streaming_response(self) -> AsyncRagToolResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncRagToolResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py
index 1df58598..ecb17c38 100644
--- a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py
+++ b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py
@@ -44,7 +44,7 @@ def with_raw_response(self) -> ToolRuntimeResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return ToolRuntimeResourceWithRawResponse(self)
 
@@ -53,7 +53,7 @@ def with_streaming_response(self) -> ToolRuntimeResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return ToolRuntimeResourceWithStreamingResponse(self)
 
@@ -159,7 +159,7 @@ def with_raw_response(self) -> AsyncToolRuntimeResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncToolRuntimeResourceWithRawResponse(self)
 
@@ -168,7 +168,7 @@ def with_streaming_response(self) -> AsyncToolRuntimeResourceWithStreamingRespon
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncToolRuntimeResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/toolgroups.py b/src/llama_stack_client/resources/toolgroups.py
index a618657d..3f0ba200 100644
--- a/src/llama_stack_client/resources/toolgroups.py
+++ b/src/llama_stack_client/resources/toolgroups.py
@@ -32,7 +32,7 @@ def with_raw_response(self) -> ToolgroupsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return ToolgroupsResourceWithRawResponse(self)
 
@@ -41,7 +41,7 @@ def with_streaming_response(self) -> ToolgroupsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return ToolgroupsResourceWithStreamingResponse(self)
 
@@ -195,7 +195,7 @@ def with_raw_response(self) -> AsyncToolgroupsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncToolgroupsResourceWithRawResponse(self)
 
@@ -204,7 +204,7 @@ def with_streaming_response(self) -> AsyncToolgroupsResourceWithStreamingRespons
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncToolgroupsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/tools.py b/src/llama_stack_client/resources/tools.py
index c9fd3808..7954f776 100644
--- a/src/llama_stack_client/resources/tools.py
+++ b/src/llama_stack_client/resources/tools.py
@@ -32,7 +32,7 @@ def with_raw_response(self) -> ToolsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return ToolsResourceWithRawResponse(self)
 
@@ -41,7 +41,7 @@ def with_streaming_response(self) -> ToolsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return ToolsResourceWithStreamingResponse(self)
 
@@ -124,7 +124,7 @@ def with_raw_response(self) -> AsyncToolsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncToolsResourceWithRawResponse(self)
 
@@ -133,7 +133,7 @@ def with_streaming_response(self) -> AsyncToolsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncToolsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/vector_dbs.py b/src/llama_stack_client/resources/vector_dbs.py
index c1ad232d..3838c38e 100644
--- a/src/llama_stack_client/resources/vector_dbs.py
+++ b/src/llama_stack_client/resources/vector_dbs.py
@@ -33,7 +33,7 @@ def with_raw_response(self) -> VectorDBsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return VectorDBsResourceWithRawResponse(self)
 
@@ -42,7 +42,7 @@ def with_streaming_response(self) -> VectorDBsResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return VectorDBsResourceWithStreamingResponse(self)
 
@@ -199,7 +199,7 @@ def with_raw_response(self) -> AsyncVectorDBsResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncVectorDBsResourceWithRawResponse(self)
 
@@ -208,7 +208,7 @@ def with_streaming_response(self) -> AsyncVectorDBsResourceWithStreamingResponse
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncVectorDBsResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/vector_io.py b/src/llama_stack_client/resources/vector_io.py
index 3d985e71..3e361435 100644
--- a/src/llama_stack_client/resources/vector_io.py
+++ b/src/llama_stack_client/resources/vector_io.py
@@ -31,7 +31,7 @@ def with_raw_response(self) -> VectorIoResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return VectorIoResourceWithRawResponse(self)
 
@@ -40,7 +40,7 @@ def with_streaming_response(self) -> VectorIoResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return VectorIoResourceWithStreamingResponse(self)
 
@@ -152,7 +152,7 @@ def with_raw_response(self) -> AsyncVectorIoResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncVectorIoResourceWithRawResponse(self)
 
@@ -161,7 +161,7 @@ def with_streaming_response(self) -> AsyncVectorIoResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncVectorIoResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/vector_stores/files.py b/src/llama_stack_client/resources/vector_stores/files.py
index 2d6af9cf..1ef48084 100644
--- a/src/llama_stack_client/resources/vector_stores/files.py
+++ b/src/llama_stack_client/resources/vector_stores/files.py
@@ -30,7 +30,7 @@ def with_raw_response(self) -> FilesResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return FilesResourceWithRawResponse(self)
 
@@ -39,7 +39,7 @@ def with_streaming_response(self) -> FilesResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return FilesResourceWithStreamingResponse(self)
 
@@ -101,7 +101,7 @@ def with_raw_response(self) -> AsyncFilesResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncFilesResourceWithRawResponse(self)
 
@@ -110,7 +110,7 @@ def with_streaming_response(self) -> AsyncFilesResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncFilesResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_client/resources/vector_stores/vector_stores.py b/src/llama_stack_client/resources/vector_stores/vector_stores.py
index 79ab9db3..7985cee9 100644
--- a/src/llama_stack_client/resources/vector_stores/vector_stores.py
+++ b/src/llama_stack_client/resources/vector_stores/vector_stores.py
@@ -50,7 +50,7 @@ def with_raw_response(self) -> VectorStoresResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return VectorStoresResourceWithRawResponse(self)
 
@@ -59,7 +59,7 @@ def with_streaming_response(self) -> VectorStoresResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return VectorStoresResourceWithStreamingResponse(self)
 
@@ -320,6 +320,7 @@ def search(
         max_num_results: int | NotGiven = NOT_GIVEN,
         ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
         rewrite_query: bool | NotGiven = NOT_GIVEN,
+        search_mode: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -343,6 +344,8 @@ def search(
 
           rewrite_query: Whether to rewrite the natural language query for vector search (default false)
 
+          search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -362,6 +365,7 @@ def search(
                     "max_num_results": max_num_results,
                     "ranking_options": ranking_options,
                     "rewrite_query": rewrite_query,
+                    "search_mode": search_mode,
                 },
                 vector_store_search_params.VectorStoreSearchParams,
             ),
@@ -383,7 +387,7 @@ def with_raw_response(self) -> AsyncVectorStoresResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncVectorStoresResourceWithRawResponse(self)
 
@@ -392,7 +396,7 @@ def with_streaming_response(self) -> AsyncVectorStoresResourceWithStreamingRespo
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response
+        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
         """
         return AsyncVectorStoresResourceWithStreamingResponse(self)
 
@@ -653,6 +657,7 @@ async def search(
         max_num_results: int | NotGiven = NOT_GIVEN,
         ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
         rewrite_query: bool | NotGiven = NOT_GIVEN,
+        search_mode: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -676,6 +681,8 @@ async def search(
 
           rewrite_query: Whether to rewrite the natural language query for vector search (default false)
 
+          search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -695,6 +702,7 @@ async def search(
                     "max_num_results": max_num_results,
                     "ranking_options": ranking_options,
                     "rewrite_query": rewrite_query,
+                    "search_mode": search_mode,
                 },
                 vector_store_search_params.VectorStoreSearchParams,
             ),
diff --git a/src/llama_stack_client/types/query_chunks_response.py b/src/llama_stack_client/types/query_chunks_response.py
index f668ecac..97c1927c 100644
--- a/src/llama_stack_client/types/query_chunks_response.py
+++ b/src/llama_stack_client/types/query_chunks_response.py
@@ -5,7 +5,45 @@
 from .._models import BaseModel
 from .shared.interleaved_content import InterleavedContent
 
-__all__ = ["QueryChunksResponse", "Chunk"]
+__all__ = ["QueryChunksResponse", "Chunk", "ChunkChunkMetadata"]
+
+
+class ChunkChunkMetadata(BaseModel):
+    chunk_embedding_dimension: Optional[int] = None
+    """The dimension of the embedding vector for the chunk."""
+
+    chunk_embedding_model: Optional[str] = None
+    """The embedding model used to create the chunk's embedding."""
+
+    chunk_id: Optional[str] = None
+    """The ID of the chunk.
+
+    If not set, it will be generated based on the document ID and content.
+    """
+
+    chunk_tokenizer: Optional[str] = None
+    """The tokenizer used to create the chunk. Default is Tiktoken."""
+
+    chunk_window: Optional[str] = None
+    """The window of the chunk, which can be used to group related chunks together."""
+
+    content_token_count: Optional[int] = None
+    """The number of tokens in the content of the chunk."""
+
+    created_timestamp: Optional[int] = None
+    """An optional timestamp indicating when the chunk was created."""
+
+    document_id: Optional[str] = None
+    """The ID of the document this chunk belongs to."""
+
+    metadata_token_count: Optional[int] = None
+    """The number of tokens in the metadata of the chunk."""
+
+    source: Optional[str] = None
+    """The source of the content, such as a URL, file path, or other identifier."""
+
+    updated_timestamp: Optional[int] = None
+    """An optional timestamp indicating when the chunk was last updated."""
 
 
 class Chunk(BaseModel):
@@ -16,13 +54,25 @@ class Chunk(BaseModel):
 
     metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
     """
-    Metadata associated with the chunk, such as document ID, source, or other
-    relevant information.
+    Metadata associated with the chunk that will be used in the model context during
+    inference.
+    """
+
+    chunk_metadata: Optional[ChunkChunkMetadata] = None
+    """Metadata for the chunk that will NOT be used in the context during inference.
+
+    The `chunk_metadata` is required backend functionality.
     """
 
     embedding: Optional[List[float]] = None
     """Optional embedding for the chunk. If not provided, it will be computed later."""
 
+    stored_chunk_id: Optional[str] = None
+    """The chunk ID that is stored in the vector database.
+
+    Used for backend functionality.
+    """
+
 
 class QueryChunksResponse(BaseModel):
     chunks: List[Chunk]
diff --git a/src/llama_stack_client/types/response_create_params.py b/src/llama_stack_client/types/response_create_params.py
index 2b198449..471d8b21 100644
--- a/src/llama_stack_client/types/response_create_params.py
+++ b/src/llama_stack_client/types/response_create_params.py
@@ -17,6 +17,11 @@
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "InputUnionMember1OpenAIResponseMessageContentUnionMember2",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2Annotation",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
     "Text",
     "TextFormat",
     "Tool",
@@ -131,7 +136,69 @@ class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInp
 ]
 
 
+class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(
+    TypedDict, total=False
+):
+    file_id: Required[str]
+
+    filename: Required[str]
+
+    index: Required[int]
+
+    type: Required[Literal["file_citation"]]
+
+
+class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(
+    TypedDict, total=False
+):
+    end_index: Required[int]
+
+    start_index: Required[int]
+
+    title: Required[str]
+
+    type: Required[Literal["url_citation"]]
+
+    url: Required[str]
+
+
+class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
+    TypedDict, total=False
+):
+    container_id: Required[str]
+
+    end_index: Required[int]
+
+    file_id: Required[str]
+
+    filename: Required[str]
+
+    start_index: Required[int]
+
+    type: Required[Literal["container_file_citation"]]
+
+
+class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(
+    TypedDict, total=False
+):
+    file_id: Required[str]
+
+    index: Required[int]
+
+    type: Required[Literal["file_path"]]
+
+
+InputUnionMember1OpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Union[
+    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+]
+
+
 class InputUnionMember1OpenAIResponseMessageContentUnionMember2(TypedDict, total=False):
+    annotations: Required[Iterable[InputUnionMember1OpenAIResponseMessageContentUnionMember2Annotation]]
+
     text: Required[str]
 
     type: Required[Literal["output_text"]]
@@ -193,7 +260,7 @@ class Text(TypedDict, total=False):
 
 
 class ToolOpenAIResponseInputToolWebSearch(TypedDict, total=False):
-    type: Required[Literal["web_search", "web_search_preview_2025_03_11"]]
+    type: Required[Literal["web_search", "web_search_preview", "web_search_preview_2025_03_11"]]
 
     search_context_size: str
 
diff --git a/src/llama_stack_client/types/response_list_response.py b/src/llama_stack_client/types/response_list_response.py
index 85c640d3..d46213ef 100644
--- a/src/llama_stack_client/types/response_list_response.py
+++ b/src/llama_stack_client/types/response_list_response.py
@@ -21,12 +21,22 @@
     "DataInputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "DataInputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "DataInputOpenAIResponseMessageContentUnionMember2",
+    "DataInputOpenAIResponseMessageContentUnionMember2Annotation",
+    "DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
     "DataOutput",
     "DataOutputOpenAIResponseMessage",
     "DataOutputOpenAIResponseMessageContentUnionMember1",
     "DataOutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "DataOutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "DataOutputOpenAIResponseMessageContentUnionMember2",
+    "DataOutputOpenAIResponseMessageContentUnionMember2Annotation",
+    "DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
     "DataOutputOpenAIResponseOutputMessageWebSearchToolCall",
     "DataOutputOpenAIResponseOutputMessageFileSearchToolCall",
     "DataOutputOpenAIResponseOutputMessageFunctionToolCall",
@@ -108,7 +118,66 @@ class DataInputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessag
 ]
 
 
+class DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(BaseModel):
+    file_id: str
+
+    filename: str
+
+    index: int
+
+    type: Literal["file_citation"]
+
+
+class DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(BaseModel):
+    end_index: int
+
+    start_index: int
+
+    title: str
+
+    type: Literal["url_citation"]
+
+    url: str
+
+
+class DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
+    BaseModel
+):
+    container_id: str
+
+    end_index: int
+
+    file_id: str
+
+    filename: str
+
+    start_index: int
+
+    type: Literal["container_file_citation"]
+
+
+class DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(BaseModel):
+    file_id: str
+
+    index: int
+
+    type: Literal["file_path"]
+
+
+DataInputOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
+    Union[
+        DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+        DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+        DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+        DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
 class DataInputOpenAIResponseMessageContentUnionMember2(BaseModel):
+    annotations: List[DataInputOpenAIResponseMessageContentUnionMember2Annotation]
+
     text: str
 
     type: Literal["output_text"]
@@ -162,7 +231,66 @@ class DataOutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessa
 ]
 
 
+class DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(BaseModel):
+    file_id: str
+
+    filename: str
+
+    index: int
+
+    type: Literal["file_citation"]
+
+
+class DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(BaseModel):
+    end_index: int
+
+    start_index: int
+
+    title: str
+
+    type: Literal["url_citation"]
+
+    url: str
+
+
+class DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
+    BaseModel
+):
+    container_id: str
+
+    end_index: int
+
+    file_id: str
+
+    filename: str
+
+    start_index: int
+
+    type: Literal["container_file_citation"]
+
+
+class DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(BaseModel):
+    file_id: str
+
+    index: int
+
+    type: Literal["file_path"]
+
+
+DataOutputOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
+    Union[
+        DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+        DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+        DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+        DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
 class DataOutputOpenAIResponseMessageContentUnionMember2(BaseModel):
+    annotations: List[DataOutputOpenAIResponseMessageContentUnionMember2Annotation]
+
     text: str
 
     type: Literal["output_text"]
diff --git a/src/llama_stack_client/types/response_object.py b/src/llama_stack_client/types/response_object.py
index a00115bb..e4b313d3 100644
--- a/src/llama_stack_client/types/response_object.py
+++ b/src/llama_stack_client/types/response_object.py
@@ -16,6 +16,11 @@
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "OutputOpenAIResponseMessageContentUnionMember2",
+    "OutputOpenAIResponseMessageContentUnionMember2Annotation",
+    "OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
     "OutputOpenAIResponseOutputMessageWebSearchToolCall",
     "OutputOpenAIResponseOutputMessageFileSearchToolCall",
     "OutputOpenAIResponseOutputMessageFunctionToolCall",
@@ -51,7 +56,64 @@ class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCo
 ]
 
 
+class OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(BaseModel):
+    file_id: str
+
+    filename: str
+
+    index: int
+
+    type: Literal["file_citation"]
+
+
+class OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(BaseModel):
+    end_index: int
+
+    start_index: int
+
+    title: str
+
+    type: Literal["url_citation"]
+
+    url: str
+
+
+class OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(BaseModel):
+    container_id: str
+
+    end_index: int
+
+    file_id: str
+
+    filename: str
+
+    start_index: int
+
+    type: Literal["container_file_citation"]
+
+
+class OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(BaseModel):
+    file_id: str
+
+    index: int
+
+    type: Literal["file_path"]
+
+
+OutputOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
+    Union[
+        OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+        OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+        OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+        OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
 class OutputOpenAIResponseMessageContentUnionMember2(BaseModel):
+    annotations: List[OutputOpenAIResponseMessageContentUnionMember2Annotation]
+
     text: str
 
     type: Literal["output_text"]
@@ -189,6 +251,16 @@ class Error(BaseModel):
 
 
 class ResponseObject(BaseModel):
+    @property
+    def output_text(self) -> str:
+        texts: List[str] = []
+        for output in self.output:
+            if output.type == "message":
+                for content in output.content:
+                    if content.type == "output_text":
+                        texts.append(content.text)
+        return "".join(texts)
+
     id: str
 
     created_at: int
diff --git a/src/llama_stack_client/types/response_object_stream.py b/src/llama_stack_client/types/response_object_stream.py
index 193491c0..311ad6bd 100644
--- a/src/llama_stack_client/types/response_object_stream.py
+++ b/src/llama_stack_client/types/response_object_stream.py
@@ -17,6 +17,11 @@
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2",
+    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2Annotation",
+    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageWebSearchToolCall",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFileSearchToolCall",
     "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFunctionToolCall",
@@ -30,6 +35,11 @@
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2",
+    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2Annotation",
+    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageWebSearchToolCall",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFileSearchToolCall",
     "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFunctionToolCall",
@@ -88,7 +98,74 @@ class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessage
 ]
 
 
+class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(
+    BaseModel
+):
+    file_id: str
+
+    filename: str
+
+    index: int
+
+    type: Literal["file_citation"]
+
+
+class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(
+    BaseModel
+):
+    end_index: int
+
+    start_index: int
+
+    title: str
+
+    type: Literal["url_citation"]
+
+    url: str
+
+
+class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
+    BaseModel
+):
+    container_id: str
+
+    end_index: int
+
+    file_id: str
+
+    filename: str
+
+    start_index: int
+
+    type: Literal["container_file_citation"]
+
+
+class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(
+    BaseModel
+):
+    file_id: str
+
+    index: int
+
+    type: Literal["file_path"]
+
+
+OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
+    Union[
+        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
 class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2(BaseModel):
+    annotations: List[
+        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2Annotation
+    ]
+
     text: str
 
     type: Literal["output_text"]
@@ -235,7 +312,74 @@ class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageC
 ]
 
 
+class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(
+    BaseModel
+):
+    file_id: str
+
+    filename: str
+
+    index: int
+
+    type: Literal["file_citation"]
+
+
+class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(
+    BaseModel
+):
+    end_index: int
+
+    start_index: int
+
+    title: str
+
+    type: Literal["url_citation"]
+
+    url: str
+
+
+class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
+    BaseModel
+):
+    container_id: str
+
+    end_index: int
+
+    file_id: str
+
+    filename: str
+
+    start_index: int
+
+    type: Literal["container_file_citation"]
+
+
+class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(
+    BaseModel
+):
+    file_id: str
+
+    index: int
+
+    type: Literal["file_path"]
+
+
+OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
+    Union[
+        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
 class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2(BaseModel):
+    annotations: List[
+        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2Annotation
+    ]
+
     text: str
 
     type: Literal["output_text"]
diff --git a/src/llama_stack_client/types/responses/input_item_list_response.py b/src/llama_stack_client/types/responses/input_item_list_response.py
index 5b63b51a..aadcd9f2 100644
--- a/src/llama_stack_client/types/responses/input_item_list_response.py
+++ b/src/llama_stack_client/types/responses/input_item_list_response.py
@@ -18,6 +18,11 @@
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
     "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
     "DataOpenAIResponseMessageContentUnionMember2",
+    "DataOpenAIResponseMessageContentUnionMember2Annotation",
+    "DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
+    "DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
+    "DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
+    "DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
 ]
 
 
@@ -90,7 +95,64 @@ class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageCont
 ]
 
 
+class DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(BaseModel):
+    file_id: str
+
+    filename: str
+
+    index: int
+
+    type: Literal["file_citation"]
+
+
+class DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(BaseModel):
+    end_index: int
+
+    start_index: int
+
+    title: str
+
+    type: Literal["url_citation"]
+
+    url: str
+
+
+class DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(BaseModel):
+    container_id: str
+
+    end_index: int
+
+    file_id: str
+
+    filename: str
+
+    start_index: int
+
+    type: Literal["container_file_citation"]
+
+
+class DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(BaseModel):
+    file_id: str
+
+    index: int
+
+    type: Literal["file_path"]
+
+
+DataOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
+    Union[
+        DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
+        DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
+        DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
+        DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
 class DataOpenAIResponseMessageContentUnionMember2(BaseModel):
+    annotations: List[DataOpenAIResponseMessageContentUnionMember2Annotation]
+
     text: str
 
     type: Literal["output_text"]
diff --git a/src/llama_stack_client/types/vector_io_insert_params.py b/src/llama_stack_client/types/vector_io_insert_params.py
index 2dded485..5b6580fe 100644
--- a/src/llama_stack_client/types/vector_io_insert_params.py
+++ b/src/llama_stack_client/types/vector_io_insert_params.py
@@ -7,7 +7,7 @@
 
 from .shared_params.interleaved_content import InterleavedContent
 
-__all__ = ["VectorIoInsertParams", "Chunk"]
+__all__ = ["VectorIoInsertParams", "Chunk", "ChunkChunkMetadata"]
 
 
 class VectorIoInsertParams(TypedDict, total=False):
@@ -28,6 +28,44 @@ class VectorIoInsertParams(TypedDict, total=False):
     """The time to live of the chunks."""
 
 
+class ChunkChunkMetadata(TypedDict, total=False):
+    chunk_embedding_dimension: int
+    """The dimension of the embedding vector for the chunk."""
+
+    chunk_embedding_model: str
+    """The embedding model used to create the chunk's embedding."""
+
+    chunk_id: str
+    """The ID of the chunk.
+
+    If not set, it will be generated based on the document ID and content.
+    """
+
+    chunk_tokenizer: str
+    """The tokenizer used to create the chunk. Default is Tiktoken."""
+
+    chunk_window: str
+    """The window of the chunk, which can be used to group related chunks together."""
+
+    content_token_count: int
+    """The number of tokens in the content of the chunk."""
+
+    created_timestamp: int
+    """An optional timestamp indicating when the chunk was created."""
+
+    document_id: str
+    """The ID of the document this chunk belongs to."""
+
+    metadata_token_count: int
+    """The number of tokens in the metadata of the chunk."""
+
+    source: str
+    """The source of the content, such as a URL, file path, or other identifier."""
+
+    updated_timestamp: int
+    """An optional timestamp indicating when the chunk was last updated."""
+
+
 class Chunk(TypedDict, total=False):
     content: Required[InterleavedContent]
     """
@@ -36,9 +74,21 @@ class Chunk(TypedDict, total=False):
 
     metadata: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
     """
-    Metadata associated with the chunk, such as document ID, source, or other
-    relevant information.
+    Metadata associated with the chunk that will be used in the model context during
+    inference.
+    """
+
+    chunk_metadata: ChunkChunkMetadata
+    """Metadata for the chunk that will NOT be used in the context during inference.
+
+    The `chunk_metadata` is required backend functionality.
     """
 
     embedding: Iterable[float]
     """Optional embedding for the chunk. If not provided, it will be computed later."""
+
+    stored_chunk_id: str
+    """The chunk ID that is stored in the vector database.
+
+    Used for backend functionality.
+    """
diff --git a/src/llama_stack_client/types/vector_store_search_params.py b/src/llama_stack_client/types/vector_store_search_params.py
index c7e86cd0..fdb02ff7 100644
--- a/src/llama_stack_client/types/vector_store_search_params.py
+++ b/src/llama_stack_client/types/vector_store_search_params.py
@@ -24,6 +24,9 @@ class VectorStoreSearchParams(TypedDict, total=False):
     rewrite_query: bool
     """Whether to rewrite the natural language query for vector search (default false)"""
 
+    search_mode: str
+    """The search mode to use - "keyword", "vector", or "hybrid" (default "vector")"""
+
 
 class RankingOptions(TypedDict, total=False):
     ranker: str
diff --git a/tests/api_resources/agents/test_session.py b/tests/api_resources/agents/test_session.py
index 43b36a40..2c80df58 100644
--- a/tests/api_resources/agents/test_session.py
+++ b/tests/api_resources/agents/test_session.py
@@ -169,7 +169,9 @@ def test_path_params_delete(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncSession:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/agents/test_steps.py b/tests/api_resources/agents/test_steps.py
index 2cee6f12..5555a9a4 100644
--- a/tests/api_resources/agents/test_steps.py
+++ b/tests/api_resources/agents/test_steps.py
@@ -93,7 +93,9 @@ def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncSteps:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/agents/test_turn.py b/tests/api_resources/agents/test_turn.py
index 3eacbba0..31eb53f9 100644
--- a/tests/api_resources/agents/test_turn.py
+++ b/tests/api_resources/agents/test_turn.py
@@ -522,7 +522,9 @@ def test_path_params_resume_overload_2(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncTurn:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index ff450202..496ea061 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -266,7 +266,9 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncCompletions:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/eval/test_jobs.py b/tests/api_resources/eval/test_jobs.py
index 5f289c74..17b02896 100644
--- a/tests/api_resources/eval/test_jobs.py
+++ b/tests/api_resources/eval/test_jobs.py
@@ -163,7 +163,9 @@ def test_path_params_status(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncJobs:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/post_training/test_job.py b/tests/api_resources/post_training/test_job.py
index 6fca52db..158eafbc 100644
--- a/tests/api_resources/post_training/test_job.py
+++ b/tests/api_resources/post_training/test_job.py
@@ -141,7 +141,9 @@ def test_streaming_response_status(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncJob:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/responses/test_input_items.py b/tests/api_resources/responses/test_input_items.py
index c1b25556..a0160f72 100644
--- a/tests/api_resources/responses/test_input_items.py
+++ b/tests/api_resources/responses/test_input_items.py
@@ -69,7 +69,9 @@ def test_path_params_list(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncInputItems:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/test_agents.py
index 1c0478a6..c4aa5349 100644
--- a/tests/api_resources/test_agents.py
+++ b/tests/api_resources/test_agents.py
@@ -146,7 +146,9 @@ def test_path_params_delete(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncAgents:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_benchmarks.py b/tests/api_resources/test_benchmarks.py
index 12cb3870..97d3d5c9 100644
--- a/tests/api_resources/test_benchmarks.py
+++ b/tests/api_resources/test_benchmarks.py
@@ -131,7 +131,9 @@ def test_streaming_response_register(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncBenchmarks:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_completions.py b/tests/api_resources/test_completions.py
index 42dd8a95..355384b0 100644
--- a/tests/api_resources/test_completions.py
+++ b/tests/api_resources/test_completions.py
@@ -141,7 +141,9 @@ def test_streaming_response_create_overload_2(self, client: LlamaStackClient) ->
 
 
 class TestAsyncCompletions:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_datasets.py b/tests/api_resources/test_datasets.py
index 010e10d0..9cd17f45 100644
--- a/tests/api_resources/test_datasets.py
+++ b/tests/api_resources/test_datasets.py
@@ -228,7 +228,9 @@ def test_path_params_unregister(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncDatasets:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_embeddings.py b/tests/api_resources/test_embeddings.py
index 5d002024..5296e9c0 100644
--- a/tests/api_resources/test_embeddings.py
+++ b/tests/api_resources/test_embeddings.py
@@ -64,7 +64,9 @@ def test_streaming_response_create(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncEmbeddings:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_eval.py b/tests/api_resources/test_eval.py
index 8d04c104..878b3d28 100644
--- a/tests/api_resources/test_eval.py
+++ b/tests/api_resources/test_eval.py
@@ -566,7 +566,9 @@ def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncEval:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_files.py b/tests/api_resources/test_files.py
index 8dbcedec..7fc5e107 100644
--- a/tests/api_resources/test_files.py
+++ b/tests/api_resources/test_files.py
@@ -202,7 +202,9 @@ def test_path_params_content(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncFiles:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_inference.py b/tests/api_resources/test_inference.py
index d876ae56..21967c9a 100644
--- a/tests/api_resources/test_inference.py
+++ b/tests/api_resources/test_inference.py
@@ -527,7 +527,9 @@ def test_streaming_response_embeddings(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncInference:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_batch_chat_completion(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_inspect.py b/tests/api_resources/test_inspect.py
index e72ce766..a43abe6c 100644
--- a/tests/api_resources/test_inspect.py
+++ b/tests/api_resources/test_inspect.py
@@ -69,7 +69,9 @@ def test_streaming_response_version(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncInspect:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_health(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index a2c8e68a..2e3f15be 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -162,7 +162,9 @@ def test_path_params_unregister(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncModels:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_post_training.py b/tests/api_resources/test_post_training.py
index 1d0613da..5e7430fb 100644
--- a/tests/api_resources/test_post_training.py
+++ b/tests/api_resources/test_post_training.py
@@ -239,7 +239,9 @@ def test_streaming_response_supervised_fine_tune(self, client: LlamaStackClient)
 
 
 class TestAsyncPostTraining:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_providers.py b/tests/api_resources/test_providers.py
index 02f910b4..b23a84bd 100644
--- a/tests/api_resources/test_providers.py
+++ b/tests/api_resources/test_providers.py
@@ -82,7 +82,9 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncProviders:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
index e3343d3e..a3fa9fd1 100644
--- a/tests/api_resources/test_responses.py
+++ b/tests/api_resources/test_responses.py
@@ -220,7 +220,9 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncResponses:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_routes.py b/tests/api_resources/test_routes.py
index d434d5b8..12b51f28 100644
--- a/tests/api_resources/test_routes.py
+++ b/tests/api_resources/test_routes.py
@@ -44,7 +44,9 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncRoutes:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_safety.py b/tests/api_resources/test_safety.py
index e3dbe9b7..257dfd76 100644
--- a/tests/api_resources/test_safety.py
+++ b/tests/api_resources/test_safety.py
@@ -71,7 +71,9 @@ def test_streaming_response_run_shield(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncSafety:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_run_shield(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_scoring.py b/tests/api_resources/test_scoring.py
index ca818363..ed46bd07 100644
--- a/tests/api_resources/test_scoring.py
+++ b/tests/api_resources/test_scoring.py
@@ -135,7 +135,9 @@ def test_streaming_response_score_batch(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncScoring:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_score(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_scoring_functions.py b/tests/api_resources/test_scoring_functions.py
index d58d5c60..44556317 100644
--- a/tests/api_resources/test_scoring_functions.py
+++ b/tests/api_resources/test_scoring_functions.py
@@ -140,7 +140,9 @@ def test_streaming_response_register(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncScoringFunctions:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_shields.py b/tests/api_resources/test_shields.py
index a351a6f0..037a66d3 100644
--- a/tests/api_resources/test_shields.py
+++ b/tests/api_resources/test_shields.py
@@ -123,7 +123,9 @@ def test_streaming_response_register(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncShields:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_synthetic_data_generation.py b/tests/api_resources/test_synthetic_data_generation.py
index db409b53..c383770e 100644
--- a/tests/api_resources/test_synthetic_data_generation.py
+++ b/tests/api_resources/test_synthetic_data_generation.py
@@ -83,7 +83,9 @@ def test_streaming_response_generate(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncSyntheticDataGeneration:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_generate(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_telemetry.py b/tests/api_resources/test_telemetry.py
index 4f3c81d4..14a8801c 100644
--- a/tests/api_resources/test_telemetry.py
+++ b/tests/api_resources/test_telemetry.py
@@ -417,7 +417,9 @@ def test_streaming_response_save_spans_to_dataset(self, client: LlamaStackClient
 
 
 class TestAsyncTelemetry:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_get_span(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_tool_runtime.py b/tests/api_resources/test_tool_runtime.py
index b13e8c1f..fa79b1ba 100644
--- a/tests/api_resources/test_tool_runtime.py
+++ b/tests/api_resources/test_tool_runtime.py
@@ -89,7 +89,9 @@ def test_streaming_response_list_tools(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncToolRuntime:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_invoke_tool(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_toolgroups.py b/tests/api_resources/test_toolgroups.py
index 42a38226..1b8e5bce 100644
--- a/tests/api_resources/test_toolgroups.py
+++ b/tests/api_resources/test_toolgroups.py
@@ -164,7 +164,9 @@ def test_path_params_unregister(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncToolgroups:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_tools.py b/tests/api_resources/test_tools.py
index 2dd1ace1..3c1f0da4 100644
--- a/tests/api_resources/test_tools.py
+++ b/tests/api_resources/test_tools.py
@@ -89,7 +89,9 @@ def test_path_params_get(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncTools:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_vector_dbs.py b/tests/api_resources/test_vector_dbs.py
index d185edf1..68d6be89 100644
--- a/tests/api_resources/test_vector_dbs.py
+++ b/tests/api_resources/test_vector_dbs.py
@@ -169,7 +169,9 @@ def test_path_params_unregister(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncVectorDBs:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/test_vector_io.py b/tests/api_resources/test_vector_io.py
index dada826d..c62a58d3 100644
--- a/tests/api_resources/test_vector_io.py
+++ b/tests/api_resources/test_vector_io.py
@@ -37,7 +37,21 @@ def test_method_insert_with_all_params(self, client: LlamaStackClient) -> None:
                 {
                     "content": "string",
                     "metadata": {"foo": True},
+                    "chunk_metadata": {
+                        "chunk_embedding_dimension": 0,
+                        "chunk_embedding_model": "chunk_embedding_model",
+                        "chunk_id": "chunk_id",
+                        "chunk_tokenizer": "chunk_tokenizer",
+                        "chunk_window": "chunk_window",
+                        "content_token_count": 0,
+                        "created_timestamp": 0,
+                        "document_id": "document_id",
+                        "metadata_token_count": 0,
+                        "source": "source",
+                        "updated_timestamp": 0,
+                    },
                     "embedding": [0],
+                    "stored_chunk_id": "stored_chunk_id",
                 }
             ],
             vector_db_id="vector_db_id",
@@ -126,7 +140,9 @@ def test_streaming_response_query(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncVectorIo:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_insert(self, async_client: AsyncLlamaStackClient) -> None:
@@ -148,7 +164,21 @@ async def test_method_insert_with_all_params(self, async_client: AsyncLlamaStack
                 {
                     "content": "string",
                     "metadata": {"foo": True},
+                    "chunk_metadata": {
+                        "chunk_embedding_dimension": 0,
+                        "chunk_embedding_model": "chunk_embedding_model",
+                        "chunk_id": "chunk_id",
+                        "chunk_tokenizer": "chunk_tokenizer",
+                        "chunk_window": "chunk_window",
+                        "content_token_count": 0,
+                        "created_timestamp": 0,
+                        "document_id": "document_id",
+                        "metadata_token_count": 0,
+                        "source": "source",
+                        "updated_timestamp": 0,
+                    },
                     "embedding": [0],
+                    "stored_chunk_id": "stored_chunk_id",
                 }
             ],
             vector_db_id="vector_db_id",
diff --git a/tests/api_resources/test_vector_stores.py b/tests/api_resources/test_vector_stores.py
index bd63d5e7..84324ca4 100644
--- a/tests/api_resources/test_vector_stores.py
+++ b/tests/api_resources/test_vector_stores.py
@@ -247,6 +247,7 @@ def test_method_search_with_all_params(self, client: LlamaStackClient) -> None:
                 "score_threshold": 0,
             },
             rewrite_query=True,
+            search_mode="search_mode",
         )
         assert_matches_type(VectorStoreSearchResponse, vector_store, path=["response"])
 
@@ -286,7 +287,9 @@ def test_path_params_search(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncVectorStores:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
@@ -513,6 +516,7 @@ async def test_method_search_with_all_params(self, async_client: AsyncLlamaStack
                 "score_threshold": 0,
             },
             rewrite_query=True,
+            search_mode="search_mode",
         )
         assert_matches_type(VectorStoreSearchResponse, vector_store, path=["response"])
 
diff --git a/tests/api_resources/tool_runtime/test_rag_tool.py b/tests/api_resources/tool_runtime/test_rag_tool.py
index 16ea0bb5..17a64d8e 100644
--- a/tests/api_resources/tool_runtime/test_rag_tool.py
+++ b/tests/api_resources/tool_runtime/test_rag_tool.py
@@ -130,7 +130,9 @@ def test_streaming_response_query(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncRagTool:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_insert(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/api_resources/vector_stores/test_files.py b/tests/api_resources/vector_stores/test_files.py
index c88e5c95..f9728a36 100644
--- a/tests/api_resources/vector_stores/test_files.py
+++ b/tests/api_resources/vector_stores/test_files.py
@@ -71,7 +71,9 @@ def test_path_params_create(self, client: LlamaStackClient) -> None:
 
 
 class TestAsyncFiles:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
 
     @parametrize
     async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
diff --git a/tests/conftest.py b/tests/conftest.py
index ed5e8a48..ddadec32 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,10 +6,12 @@
 import logging
 from typing import TYPE_CHECKING, Iterator, AsyncIterator
 
+import httpx
 import pytest
 from pytest_asyncio import is_async_test
 
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
+from llama_stack_client import LlamaStackClient, DefaultAioHttpClient, AsyncLlamaStackClient
+from llama_stack_client._utils import is_dict
 
 if TYPE_CHECKING:
     from _pytest.fixtures import FixtureRequest  # pyright: ignore[reportPrivateImportUsage]
@@ -27,6 +29,19 @@ def pytest_collection_modifyitems(items: list[pytest.Function]) -> None:
     for async_test in pytest_asyncio_tests:
         async_test.add_marker(session_scope_marker, append=False)
 
+    # We skip tests that use both the aiohttp client and respx_mock as respx_mock
+    # doesn't support custom transports.
+    for item in items:
+        if "async_client" not in item.fixturenames or "respx_mock" not in item.fixturenames:
+            continue
+
+        if not hasattr(item, "callspec"):
+            continue
+
+        async_client_param = item.callspec.params.get("async_client")
+        if is_dict(async_client_param) and async_client_param.get("http_client") == "aiohttp":
+            item.add_marker(pytest.mark.skip(reason="aiohttp client is not compatible with respx_mock"))
+
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -43,9 +58,25 @@ def client(request: FixtureRequest) -> Iterator[LlamaStackClient]:
 
 @pytest.fixture(scope="session")
 async def async_client(request: FixtureRequest) -> AsyncIterator[AsyncLlamaStackClient]:
-    strict = getattr(request, "param", True)
-    if not isinstance(strict, bool):
-        raise TypeError(f"Unexpected fixture parameter type {type(strict)}, expected {bool}")
-
-    async with AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=strict) as client:
+    param = getattr(request, "param", True)
+
+    # defaults
+    strict = True
+    http_client: None | httpx.AsyncClient = None
+
+    if isinstance(param, bool):
+        strict = param
+    elif is_dict(param):
+        strict = param.get("strict", True)
+        assert isinstance(strict, bool)
+
+        http_client_type = param.get("http_client", "httpx")
+        if http_client_type == "aiohttp":
+            http_client = DefaultAioHttpClient()
+    else:
+        raise TypeError(f"Unexpected fixture parameter type {type(param)}, expected bool or dict")
+
+    async with AsyncLlamaStackClient(
+        base_url=base_url, _strict_response_validation=strict, http_client=http_client
+    ) as client:
         yield client
diff --git a/tests/test_client.py b/tests/test_client.py
index 59472837..6a1a8f85 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -182,6 +182,7 @@ def test_copy_signature(self) -> None:
             copy_param = copy_signature.parameters.get(name)
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
+    @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
     def test_copy_build_request(self) -> None:
         options = FinalRequestOptions(method="get", url="/foo")
 
@@ -993,6 +994,7 @@ def test_copy_signature(self) -> None:
             copy_param = copy_signature.parameters.get(name)
             assert copy_param is not None, f"copy() signature is missing the {name} param"
 
+    @pytest.mark.skipif(sys.version_info >= (3, 10), reason="fails because of a memory leak that started from 3.12")
     def test_copy_build_request(self) -> None:
         options = FinalRequestOptions(method="get", url="/foo")