llamastack · hardikjshah · Jun 12, 2025 · Jun 12, 2025
diff --git a/src/llama_stack_client/_base_client.py b/src/llama_stack_client/_base_client.py
@@ -960,6 +960,9 @@ def request(
             if self.custom_auth is not None:
                 kwargs["auth"] = self.custom_auth
 
+            if options.follow_redirects is not None:
+                kwargs["follow_redirects"] = options.follow_redirects
+
             log.debug("Sending HTTP Request: %s %s", request.method, request.url)
 
             response = None
@@ -1460,6 +1463,9 @@ async def request(
             if self.custom_auth is not None:
                 kwargs["auth"] = self.custom_auth
 
+            if options.follow_redirects is not None:
+                kwargs["follow_redirects"] = options.follow_redirects
+
             log.debug("Sending HTTP Request: %s %s", request.method, request.url)
 
             response = None

diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
@@ -23,6 +23,7 @@
 from ._utils import is_given, get_async_library
 from ._version import __version__
 from .resources import (
+    files,
     tools,
     models,
     routes,
@@ -33,13 +34,14 @@
     datasets,
     inference,
     providers,
-    responses,
     telemetry,
     vector_io,
     benchmarks,
+    embeddings,
     toolgroups,
     vector_dbs,
     completions,
+    vector_stores,
     scoring_functions,
     synthetic_data_generation,
 )
@@ -53,6 +55,7 @@
 from .resources.chat import chat
 from .resources.eval import eval
 from .resources.agents import agents
+from .resources.responses import responses
 from .resources.tool_runtime import tool_runtime
 from .resources.post_training import post_training
 
@@ -78,10 +81,12 @@ class LlamaStackClient(SyncAPIClient):
     eval: eval.EvalResource
     inspect: inspect.InspectResource
     inference: inference.InferenceResource
+    embeddings: embeddings.EmbeddingsResource
     chat: chat.ChatResource
     completions: completions.CompletionsResource
     vector_io: vector_io.VectorIoResource
     vector_dbs: vector_dbs.VectorDBsResource
+    vector_stores: vector_stores.VectorStoresResource
     models: models.ModelsResource
     post_training: post_training.PostTrainingResource
     providers: providers.ProvidersResource
@@ -93,6 +98,7 @@ class LlamaStackClient(SyncAPIClient):
     scoring: scoring.ScoringResource
     scoring_functions: scoring_functions.ScoringFunctionsResource
     benchmarks: benchmarks.BenchmarksResource
+    files: files.FilesResource
     with_raw_response: LlamaStackClientWithRawResponse
     with_streaming_response: LlamaStackClientWithStreamedResponse
 
@@ -161,10 +167,12 @@ def __init__(
         self.eval = eval.EvalResource(self)
         self.inspect = inspect.InspectResource(self)
         self.inference = inference.InferenceResource(self)
+        self.embeddings = embeddings.EmbeddingsResource(self)
         self.chat = chat.ChatResource(self)
         self.completions = completions.CompletionsResource(self)
         self.vector_io = vector_io.VectorIoResource(self)
         self.vector_dbs = vector_dbs.VectorDBsResource(self)
+        self.vector_stores = vector_stores.VectorStoresResource(self)
         self.models = models.ModelsResource(self)
         self.post_training = post_training.PostTrainingResource(self)
         self.providers = providers.ProvidersResource(self)
@@ -176,6 +184,7 @@ def __init__(
         self.scoring = scoring.ScoringResource(self)
         self.scoring_functions = scoring_functions.ScoringFunctionsResource(self)
         self.benchmarks = benchmarks.BenchmarksResource(self)
+        self.files = files.FilesResource(self)
         self.with_raw_response = LlamaStackClientWithRawResponse(self)
         self.with_streaming_response = LlamaStackClientWithStreamedResponse(self)
 
@@ -296,10 +305,12 @@ class AsyncLlamaStackClient(AsyncAPIClient):
     eval: eval.AsyncEvalResource
     inspect: inspect.AsyncInspectResource
     inference: inference.AsyncInferenceResource
+    embeddings: embeddings.AsyncEmbeddingsResource
     chat: chat.AsyncChatResource
     completions: completions.AsyncCompletionsResource
     vector_io: vector_io.AsyncVectorIoResource
     vector_dbs: vector_dbs.AsyncVectorDBsResource
+    vector_stores: vector_stores.AsyncVectorStoresResource
     models: models.AsyncModelsResource
     post_training: post_training.AsyncPostTrainingResource
     providers: providers.AsyncProvidersResource
@@ -311,6 +322,7 @@ class AsyncLlamaStackClient(AsyncAPIClient):
     scoring: scoring.AsyncScoringResource
     scoring_functions: scoring_functions.AsyncScoringFunctionsResource
     benchmarks: benchmarks.AsyncBenchmarksResource
+    files: files.AsyncFilesResource
     with_raw_response: AsyncLlamaStackClientWithRawResponse
     with_streaming_response: AsyncLlamaStackClientWithStreamedResponse
 
@@ -379,10 +391,12 @@ def __init__(
         self.eval = eval.AsyncEvalResource(self)
         self.inspect = inspect.AsyncInspectResource(self)
         self.inference = inference.AsyncInferenceResource(self)
+        self.embeddings = embeddings.AsyncEmbeddingsResource(self)
         self.chat = chat.AsyncChatResource(self)
         self.completions = completions.AsyncCompletionsResource(self)
         self.vector_io = vector_io.AsyncVectorIoResource(self)
         self.vector_dbs = vector_dbs.AsyncVectorDBsResource(self)
+        self.vector_stores = vector_stores.AsyncVectorStoresResource(self)
         self.models = models.AsyncModelsResource(self)
         self.post_training = post_training.AsyncPostTrainingResource(self)
         self.providers = providers.AsyncProvidersResource(self)
@@ -394,6 +408,7 @@ def __init__(
         self.scoring = scoring.AsyncScoringResource(self)
         self.scoring_functions = scoring_functions.AsyncScoringFunctionsResource(self)
         self.benchmarks = benchmarks.AsyncBenchmarksResource(self)
+        self.files = files.AsyncFilesResource(self)
         self.with_raw_response = AsyncLlamaStackClientWithRawResponse(self)
         self.with_streaming_response = AsyncLlamaStackClientWithStreamedResponse(self)
 
@@ -515,10 +530,12 @@ def __init__(self, client: LlamaStackClient) -> None:
         self.eval = eval.EvalResourceWithRawResponse(client.eval)
         self.inspect = inspect.InspectResourceWithRawResponse(client.inspect)
         self.inference = inference.InferenceResourceWithRawResponse(client.inference)
+        self.embeddings = embeddings.EmbeddingsResourceWithRawResponse(client.embeddings)
         self.chat = chat.ChatResourceWithRawResponse(client.chat)
         self.completions = completions.CompletionsResourceWithRawResponse(client.completions)
         self.vector_io = vector_io.VectorIoResourceWithRawResponse(client.vector_io)
         self.vector_dbs = vector_dbs.VectorDBsResourceWithRawResponse(client.vector_dbs)
+        self.vector_stores = vector_stores.VectorStoresResourceWithRawResponse(client.vector_stores)
         self.models = models.ModelsResourceWithRawResponse(client.models)
         self.post_training = post_training.PostTrainingResourceWithRawResponse(client.post_training)
         self.providers = providers.ProvidersResourceWithRawResponse(client.providers)
@@ -532,6 +549,7 @@ def __init__(self, client: LlamaStackClient) -> None:
         self.scoring = scoring.ScoringResourceWithRawResponse(client.scoring)
         self.scoring_functions = scoring_functions.ScoringFunctionsResourceWithRawResponse(client.scoring_functions)
         self.benchmarks = benchmarks.BenchmarksResourceWithRawResponse(client.benchmarks)
+        self.files = files.FilesResourceWithRawResponse(client.files)
 
 
 class AsyncLlamaStackClientWithRawResponse:
@@ -545,10 +563,12 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
         self.eval = eval.AsyncEvalResourceWithRawResponse(client.eval)
         self.inspect = inspect.AsyncInspectResourceWithRawResponse(client.inspect)
         self.inference = inference.AsyncInferenceResourceWithRawResponse(client.inference)
+        self.embeddings = embeddings.AsyncEmbeddingsResourceWithRawResponse(client.embeddings)
         self.chat = chat.AsyncChatResourceWithRawResponse(client.chat)
         self.completions = completions.AsyncCompletionsResourceWithRawResponse(client.completions)
         self.vector_io = vector_io.AsyncVectorIoResourceWithRawResponse(client.vector_io)
         self.vector_dbs = vector_dbs.AsyncVectorDBsResourceWithRawResponse(client.vector_dbs)
+        self.vector_stores = vector_stores.AsyncVectorStoresResourceWithRawResponse(client.vector_stores)
         self.models = models.AsyncModelsResourceWithRawResponse(client.models)
         self.post_training = post_training.AsyncPostTrainingResourceWithRawResponse(client.post_training)
         self.providers = providers.AsyncProvidersResourceWithRawResponse(client.providers)
@@ -564,6 +584,7 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
             client.scoring_functions
         )
         self.benchmarks = benchmarks.AsyncBenchmarksResourceWithRawResponse(client.benchmarks)
+        self.files = files.AsyncFilesResourceWithRawResponse(client.files)
 
 
 class LlamaStackClientWithStreamedResponse:
@@ -577,10 +598,12 @@ def __init__(self, client: LlamaStackClient) -> None:
         self.eval = eval.EvalResourceWithStreamingResponse(client.eval)
         self.inspect = inspect.InspectResourceWithStreamingResponse(client.inspect)
         self.inference = inference.InferenceResourceWithStreamingResponse(client.inference)
+        self.embeddings = embeddings.EmbeddingsResourceWithStreamingResponse(client.embeddings)
         self.chat = chat.ChatResourceWithStreamingResponse(client.chat)
         self.completions = completions.CompletionsResourceWithStreamingResponse(client.completions)
         self.vector_io = vector_io.VectorIoResourceWithStreamingResponse(client.vector_io)
         self.vector_dbs = vector_dbs.VectorDBsResourceWithStreamingResponse(client.vector_dbs)
+        self.vector_stores = vector_stores.VectorStoresResourceWithStreamingResponse(client.vector_stores)
         self.models = models.ModelsResourceWithStreamingResponse(client.models)
         self.post_training = post_training.PostTrainingResourceWithStreamingResponse(client.post_training)
         self.providers = providers.ProvidersResourceWithStreamingResponse(client.providers)
@@ -596,6 +619,7 @@ def __init__(self, client: LlamaStackClient) -> None:
             client.scoring_functions
         )
         self.benchmarks = benchmarks.BenchmarksResourceWithStreamingResponse(client.benchmarks)
+        self.files = files.FilesResourceWithStreamingResponse(client.files)
 
 
 class AsyncLlamaStackClientWithStreamedResponse:
@@ -609,10 +633,12 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
         self.eval = eval.AsyncEvalResourceWithStreamingResponse(client.eval)
         self.inspect = inspect.AsyncInspectResourceWithStreamingResponse(client.inspect)
         self.inference = inference.AsyncInferenceResourceWithStreamingResponse(client.inference)
+        self.embeddings = embeddings.AsyncEmbeddingsResourceWithStreamingResponse(client.embeddings)
         self.chat = chat.AsyncChatResourceWithStreamingResponse(client.chat)
         self.completions = completions.AsyncCompletionsResourceWithStreamingResponse(client.completions)
         self.vector_io = vector_io.AsyncVectorIoResourceWithStreamingResponse(client.vector_io)
         self.vector_dbs = vector_dbs.AsyncVectorDBsResourceWithStreamingResponse(client.vector_dbs)
+        self.vector_stores = vector_stores.AsyncVectorStoresResourceWithStreamingResponse(client.vector_stores)
         self.models = models.AsyncModelsResourceWithStreamingResponse(client.models)
         self.post_training = post_training.AsyncPostTrainingResourceWithStreamingResponse(client.post_training)
         self.providers = providers.AsyncProvidersResourceWithStreamingResponse(client.providers)
@@ -630,6 +656,7 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
             client.scoring_functions
         )
         self.benchmarks = benchmarks.AsyncBenchmarksResourceWithStreamingResponse(client.benchmarks)
+        self.files = files.AsyncFilesResourceWithStreamingResponse(client.files)
 
 
 Client = LlamaStackClient

diff --git a/src/llama_stack_client/_files.py b/src/llama_stack_client/_files.py
@@ -34,7 +34,7 @@ def assert_is_file_content(obj: object, *, key: str | None = None) -> None:
     if not is_file_content(obj):
         prefix = f"Expected entry at `{key}`" if key is not None else f"Expected file input `{obj!r}`"
         raise RuntimeError(
-            f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(obj)} instead."
+            f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(obj)} instead. See https://github.com/stainless-sdks/llama-stack-python/tree/main#file-uploads"
         ) from None
 
 

diff --git a/src/llama_stack_client/_models.py b/src/llama_stack_client/_models.py
@@ -737,6 +737,7 @@ class FinalRequestOptionsInput(TypedDict, total=False):
     idempotency_key: str
     json_data: Body
     extra_json: AnyMapping
+    follow_redirects: bool
 
 
 @final
@@ -750,6 +751,7 @@ class FinalRequestOptions(pydantic.BaseModel):
     files: Union[HttpxRequestFiles, None] = None
     idempotency_key: Union[str, None] = None
     post_parser: Union[Callable[[Any], Any], NotGiven] = NotGiven()
+    follow_redirects: Union[bool, None] = None
 
     # It should be noted that we cannot use `json` here as that would override
     # a BaseModel method in an incompatible fashion.

diff --git a/src/llama_stack_client/_types.py b/src/llama_stack_client/_types.py
@@ -100,6 +100,7 @@ class RequestOptions(TypedDict, total=False):
     params: Query
     extra_json: AnyMapping
     idempotency_key: str
+    follow_redirects: bool
 
 
 # Sentinel class used until PEP 0661 is accepted
@@ -215,3 +216,4 @@ class _GenericAlias(Protocol):
 
 class HttpxSendArgs(TypedDict, total=False):
     auth: httpx.Auth
+    follow_redirects: bool
diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py
@@ -16,6 +16,14 @@
     EvalResourceWithStreamingResponse,
     AsyncEvalResourceWithStreamingResponse,
 )
+from .files import (
+    FilesResource,
+    AsyncFilesResource,
+    FilesResourceWithRawResponse,
+    AsyncFilesResourceWithRawResponse,
+    FilesResourceWithStreamingResponse,
+    AsyncFilesResourceWithStreamingResponse,
+)
 from .tools import (
     ToolsResource,
     AsyncToolsResource,
@@ -136,6 +144,14 @@
     BenchmarksResourceWithStreamingResponse,
     AsyncBenchmarksResourceWithStreamingResponse,
 )
+from .embeddings import (
+    EmbeddingsResource,
+    AsyncEmbeddingsResource,
+    EmbeddingsResourceWithRawResponse,
+    AsyncEmbeddingsResourceWithRawResponse,
+    EmbeddingsResourceWithStreamingResponse,
+    AsyncEmbeddingsResourceWithStreamingResponse,
+)
 from .toolgroups import (
     ToolgroupsResource,
     AsyncToolgroupsResource,
@@ -176,6 +192,14 @@
     PostTrainingResourceWithStreamingResponse,
     AsyncPostTrainingResourceWithStreamingResponse,
 )
+from .vector_stores import (
+    VectorStoresResource,
+    AsyncVectorStoresResource,
+    VectorStoresResourceWithRawResponse,
+    AsyncVectorStoresResourceWithRawResponse,
+    VectorStoresResourceWithStreamingResponse,
+    AsyncVectorStoresResourceWithStreamingResponse,
+)
 from .scoring_functions import (
     ScoringFunctionsResource,
     AsyncScoringFunctionsResource,
@@ -248,6 +272,12 @@
     "AsyncInferenceResourceWithRawResponse",
     "InferenceResourceWithStreamingResponse",
     "AsyncInferenceResourceWithStreamingResponse",
+    "EmbeddingsResource",
+    "AsyncEmbeddingsResource",
+    "EmbeddingsResourceWithRawResponse",
+    "AsyncEmbeddingsResourceWithRawResponse",
+    "EmbeddingsResourceWithStreamingResponse",
+    "AsyncEmbeddingsResourceWithStreamingResponse",
     "ChatResource",
     "AsyncChatResource",
     "ChatResourceWithRawResponse",
@@ -272,6 +302,12 @@
     "AsyncVectorDBsResourceWithRawResponse",
     "VectorDBsResourceWithStreamingResponse",
     "AsyncVectorDBsResourceWithStreamingResponse",
+    "VectorStoresResource",
+    "AsyncVectorStoresResource",
+    "VectorStoresResourceWithRawResponse",
+    "AsyncVectorStoresResourceWithRawResponse",
+    "VectorStoresResourceWithStreamingResponse",
+    "AsyncVectorStoresResourceWithStreamingResponse",
     "ModelsResource",
     "AsyncModelsResource",
     "ModelsResourceWithRawResponse",
@@ -338,4 +374,10 @@
     "AsyncBenchmarksResourceWithRawResponse",
     "BenchmarksResourceWithStreamingResponse",
     "AsyncBenchmarksResourceWithStreamingResponse",
+    "FilesResource",
+    "AsyncFilesResource",
+    "FilesResourceWithRawResponse",
+    "AsyncFilesResourceWithRawResponse",
+    "FilesResourceWithStreamingResponse",
+    "AsyncFilesResourceWithStreamingResponse",
 ]