llamastack · stainless-app · Dec 22, 2025 · Dec 17, 2025 · Nov 28, 2025 · Nov 28, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -25,7 +25,7 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@v5
         with:
-          version: '0.8.11'
+          version: '0.9.13'
 
       - name: Install dependencies
         run: uv sync --all-extras
@@ -47,7 +47,7 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@v5
         with:
-          version: '0.8.11'
+          version: '0.9.13'
 
       - name: Install dependencies
         run: uv sync --all-extras
@@ -81,7 +81,7 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@v5
         with:
-          version: '0.8.11'
+          version: '0.9.13'
 
       - name: Bootstrap
         run: ./scripts/bootstrap

diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
@@ -19,7 +19,7 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@v5
         with:
-          version: '0.8.11'
+          version: '0.9.13'
 
       - name: Publish to PyPI
         run: |

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.4.0-alpha.13"
+  ".": "0.4.0-alpha.14"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 103
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-73fc7f59a69be032d1b18e2fff3ed7509e175703332723b27aac50e2514ca854.yml
-openapi_spec_hash: a22051c017a4822ef689585896659675
-config_hash: 39578cfdeb4a10121f2cb3fa3e4d5e20
+configured_endpoints: 108
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-5b5c7384ecc87cd37cdcfef47628d88ae84c366452114e8891d5d9cf4fde810c.yml
+openapi_spec_hash: 9774340e22da2895556cf845f044a69a
+config_hash: aa28e451064c13a38ddc44df99ebf52a
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,49 @@
 # Changelog
 
+## 0.4.0-alpha.14 (2025-12-22)
+
+Full Changelog: [v0.4.0-alpha.13...v0.4.0-alpha.14](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.13...v0.4.0-alpha.14)
+
+### Features
+
+* Add metadata field to request and response ([a83a98e](https://github.com/llamastack/llama-stack-client-python/commit/a83a98e73429e9c12fc679e429cd350498276f2f))
+* add support for tool_choice to repsponses api ([32c453f](https://github.com/llamastack/llama-stack-client-python/commit/32c453f2c2ba39e65830d36f92b11d7ea6bf98f5))
+* **api:** add readonly connectors API ([05f867b](https://github.com/llamastack/llama-stack-client-python/commit/05f867b82f03b4ece33c52b68a06b5e68c345ccc))
+* **api:** deprecate `toolgroup` and `tool_runtime` apis ([d5cef5a](https://github.com/llamastack/llama-stack-client-python/commit/d5cef5ae52db0f4d4ccffb36d4c69e39182af91b))
+* convert Benchmarks API to use FastAPI router ([c784ae1](https://github.com/llamastack/llama-stack-client-python/commit/c784ae1fd823e70fef91793125fadd44d33ba0ce))
+* convert Datasets API to use FastAPI router ([2f53a3f](https://github.com/llamastack/llama-stack-client-python/commit/2f53a3f60fb7da474e2e8d3839d6c58d6a6d2c89))
+* Implement FastAPI router system ([2ca4485](https://github.com/llamastack/llama-stack-client-python/commit/2ca44854928f4360911439690461fa0cfa2db8f9))
+* Implement include parameter specifically for adding logprobs in the output message ([05a0330](https://github.com/llamastack/llama-stack-client-python/commit/05a0330bb08ca21980d79ffdd66cdd2e3af21d51))
+* introduce /admin API for stack administration and operations ([3279ec9](https://github.com/llamastack/llama-stack-client-python/commit/3279ec9799e21595b7410f0cc784a4b0c854ade5))
+* migrate Inspect API to FastAPI router ([7774bec](https://github.com/llamastack/llama-stack-client-python/commit/7774bec492608a78eedb804fd6ab7997fdabdc60))
+* migrate Providers API to FastAPI router pattern ([f8c84cd](https://github.com/llamastack/llama-stack-client-python/commit/f8c84cd0fb2ba351366c3a68c9836dd8112b09d0))
+
+
+### Bug Fixes
+
+* **client:** fix issue with duplicate definitions in Go ([dd94553](https://github.com/llamastack/llama-stack-client-python/commit/dd9455390a84c918ef517a2d1488598c44a9fb40))
+* ensure streams are always closed ([9dc1025](https://github.com/llamastack/llama-stack-client-python/commit/9dc1025c4679f82a9528e859ce2332b9bc8dfb9d))
+* **types:** allow pyright to infer TypedDict types within SequenceNotStr ([93be516](https://github.com/llamastack/llama-stack-client-python/commit/93be5160a0dd1260b728959c29a931b5d7f42fdb))
+* use async_to_httpx_files in patch method ([77df10d](https://github.com/llamastack/llama-stack-client-python/commit/77df10d1c6635984f5f9b48e4479872c88fbb91b))
+
+
+### Chores
+
+* add missing docstrings ([18c54f3](https://github.com/llamastack/llama-stack-client-python/commit/18c54f3ca59edc71633a937914762129856a8950))
+* bump required `uv` version ([2a1523b](https://github.com/llamastack/llama-stack-client-python/commit/2a1523b450a8e66ce2e16f20a5b73f821144748a))
+* **deps:** mypy 1.18.1 has a regression, pin to 1.17 ([a900167](https://github.com/llamastack/llama-stack-client-python/commit/a90016741193156e589525c874dff32459be2a6d))
+* **internal:** add `--fix` argument to lint script ([89e14ad](https://github.com/llamastack/llama-stack-client-python/commit/89e14ad88db7e5ac1dc088c6affec85e749ff4e8))
+* **internal:** add missing files argument to base client ([9cae3df](https://github.com/llamastack/llama-stack-client-python/commit/9cae3df5c115b176be5515fb47ed3d2d69cd0bb8))
+* **internal:** avoid using unstable Python versions in tests ([36336dd](https://github.com/llamastack/llama-stack-client-python/commit/36336dd67f3dcaa355b3b4bb99d4e1f6d73b9779))
+* **internal:** version bump ([597c7c6](https://github.com/llamastack/llama-stack-client-python/commit/597c7c6156b28bcb7f3c59d742532c1204dd2647))
+* update lockfile ([de67c1e](https://github.com/llamastack/llama-stack-client-python/commit/de67c1ebd3abbdc111dfbd979e1ecb655cff25e7))
+* update uv.lock ([95cb22a](https://github.com/llamastack/llama-stack-client-python/commit/95cb22ab14711e3c4220dcf9a83bc87ea2436efe))
+
+
+### Documentation
+
+* add more examples ([d24dff6](https://github.com/llamastack/llama-stack-client-python/commit/d24dff6000019c0091f13fecf63ac90e02122cae))
+
 ## 0.4.0-alpha.13 (2025-12-15)
 
 Full Changelog: [v0.4.0-alpha.12...v0.4.0-alpha.13](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.12...v0.4.0-alpha.13)

diff --git a/README.md b/README.md
@@ -127,58 +127,77 @@ async def main() -> None:
 asyncio.run(main())
 ```
 
-## Streaming responses
+## Using types
+
+Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev) which also provide helper methods for things like:
 
-We provide support for streaming responses using Server Side Events (SSE).
+- Serializing back into JSON, `model.to_json()`
+- Converting to a dictionary, `model.to_dict()`
+
+Typed requests and responses provide autocomplete and documentation within your editor. If you would like to see type errors in VS Code to help catch bugs earlier, set `python.analysis.typeCheckingMode` to `basic`.
+
+## Pagination
+
+List methods in the Llama Stack Client API are paginated.
+
+This library provides auto-paginating iterators with each list response, so you do not have to request successive pages manually:
 
 ```python
 from llama_stack_client import LlamaStackClient
 
 client = LlamaStackClient()
 
-stream = client.chat.completions.create(
-    messages=[
-        {
-            "content": "string",
-            "role": "user",
-        }
-    ],
-    model="model",
-    stream=True,
-)
-for completion in stream:
-    print(completion.id)
+all_responses = []
+# Automatically fetches more pages as needed.
+for response in client.responses.list():
+    # Do something with response here
+    all_responses.append(response)
+print(all_responses)
 ```
 
-The async client uses the exact same interface.
+Or, asynchronously:
 
 ```python
+import asyncio
 from llama_stack_client import AsyncLlamaStackClient
 
 client = AsyncLlamaStackClient()
 
-stream = await client.chat.completions.create(
-    messages=[
-        {
-            "content": "string",
-            "role": "user",
-        }
-    ],
-    model="model",
-    stream=True,
-)
-async for completion in stream:
-    print(completion.id)
+
+async def main() -> None:
+    all_responses = []
+    # Iterate through items across all pages, issuing requests as needed.
+    async for response in client.responses.list():
+        all_responses.append(response)
+    print(all_responses)
+
+
+asyncio.run(main())
 ```
 
-## Using types
+Alternatively, you can use the `.has_next_page()`, `.next_page_info()`, or `.get_next_page()` methods for more granular control working with pages:
 
-Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev) which also provide helper methods for things like:
+```python
+first_page = await client.responses.list()
+if first_page.has_next_page():
+    print(f"will fetch next page using these details: {first_page.next_page_info()}")
+    next_page = await first_page.get_next_page()
+    print(f"number of items we just fetched: {len(next_page.data)}")
 
-- Serializing back into JSON, `model.to_json()`
-- Converting to a dictionary, `model.to_dict()`
+# Remove `await` for non-async usage.
+```
 
-Typed requests and responses provide autocomplete and documentation within your editor. If you would like to see type errors in VS Code to help catch bugs earlier, set `python.analysis.typeCheckingMode` to `basic`.
+Or just work directly with the returned data:
+
+```python
+first_page = await client.responses.list()
+
+print(f"next page cursor: {first_page.last_id}")  # => "next page cursor: ..."
+for response in first_page.data:
+    print(response.id)
+
+# Remove `await` for non-async usage.
+```
 
 ## Nested params
 

diff --git a/api.md b/api.md
@@ -2,13 +2,19 @@
 
 ```python
 from llama_stack_client.types import (
+    HealthInfo,
     InterleavedContent,
     InterleavedContentItem,
+    ListProvidersResponse,
+    ListRoutesResponse,
     ParamType,
+    ProviderInfo,
+    RouteInfo,
     SafetyViolation,
     SamplingParams,
     ScoringResult,
     SystemMessage,
+    VersionInfo,
 )
 ```
 
@@ -145,16 +151,10 @@ Methods:
 
 # Inspect
 
-Types:
-
-```python
-from llama_stack_client.types import HealthInfo, ProviderInfo, RouteInfo, VersionInfo
-```
-
 Methods:
 
-- <code title="get /v1/health">client.inspect.<a href="./src/llama_stack_client/resources/inspect.py">health</a>() -> <a href="./src/llama_stack_client/types/health_info.py">HealthInfo</a></code>
-- <code title="get /v1/version">client.inspect.<a href="./src/llama_stack_client/resources/inspect.py">version</a>() -> <a href="./src/llama_stack_client/types/version_info.py">VersionInfo</a></code>
+- <code title="get /v1/health">client.inspect.<a href="./src/llama_stack_client/resources/inspect.py">health</a>() -> <a href="./src/llama_stack_client/types/shared/health_info.py">HealthInfo</a></code>
+- <code title="get /v1/version">client.inspect.<a href="./src/llama_stack_client/resources/inspect.py">version</a>() -> <a href="./src/llama_stack_client/types/shared/version_info.py">VersionInfo</a></code>
 
 # Embeddings
 
@@ -312,20 +312,20 @@ Methods:
 Types:
 
 ```python
-from llama_stack_client.types import ListProvidersResponse, ProviderListResponse
+from llama_stack_client.types import ProviderListResponse
 ```
 
 Methods:
 
-- <code title="get /v1/providers/{provider_id}">client.providers.<a href="./src/llama_stack_client/resources/providers.py">retrieve</a>(provider_id) -> <a href="./src/llama_stack_client/types/provider_info.py">ProviderInfo</a></code>
+- <code title="get /v1/providers/{provider_id}">client.providers.<a href="./src/llama_stack_client/resources/providers.py">retrieve</a>(provider_id) -> <a href="./src/llama_stack_client/types/shared/provider_info.py">ProviderInfo</a></code>
 - <code title="get /v1/providers">client.providers.<a href="./src/llama_stack_client/resources/providers.py">list</a>() -> <a href="./src/llama_stack_client/types/provider_list_response.py">ProviderListResponse</a></code>
 
 # Routes
 
 Types:
 
 ```python
-from llama_stack_client.types import ListRoutesResponse, RouteListResponse
+from llama_stack_client.types import RouteListResponse
 ```
 
 Methods:
@@ -528,6 +528,16 @@ Methods:
 - <code title="delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">cancel</a>(job_id, \*, benchmark_id) -> None</code>
 - <code title="get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}">client.alpha.eval.jobs.<a href="./src/llama_stack_client/resources/alpha/eval/jobs.py">status</a>(job_id, \*, benchmark_id) -> <a href="./src/llama_stack_client/types/alpha/job.py">Job</a></code>
 
+## Admin
+
+Methods:
+
+- <code title="get /v1alpha/admin/health">client.alpha.admin.<a href="./src/llama_stack_client/resources/alpha/admin.py">health</a>() -> <a href="./src/llama_stack_client/types/shared/health_info.py">HealthInfo</a></code>
+- <code title="get /v1alpha/admin/providers/{provider_id}">client.alpha.admin.<a href="./src/llama_stack_client/resources/alpha/admin.py">inspect_provider</a>(provider_id) -> <a href="./src/llama_stack_client/types/shared/provider_info.py">ProviderInfo</a></code>
+- <code title="get /v1alpha/admin/providers">client.alpha.admin.<a href="./src/llama_stack_client/resources/alpha/admin.py">list_providers</a>() -> <a href="./src/llama_stack_client/types/provider_list_response.py">ProviderListResponse</a></code>
+- <code title="get /v1alpha/admin/inspect/routes">client.alpha.admin.<a href="./src/llama_stack_client/resources/alpha/admin.py">list_routes</a>(\*\*<a href="src/llama_stack_client/types/alpha/admin_list_routes_params.py">params</a>) -> <a href="./src/llama_stack_client/types/route_list_response.py">RouteListResponse</a></code>
+- <code title="get /v1alpha/admin/version">client.alpha.admin.<a href="./src/llama_stack_client/resources/alpha/admin.py">version</a>() -> <a href="./src/llama_stack_client/types/shared/version_info.py">VersionInfo</a></code>
+
 # Beta
 
 ## Datasets

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,17 +1,18 @@
 [project]
 name = "llama_stack_client"
-version = "0.4.0-alpha.13"
+version = "0.4.0-alpha.14"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "MIT"
 authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
+
 dependencies = [
-    "httpx>=0.23.0, <1",
-    "pydantic>=1.9.0, <3",
+  "httpx>=0.23.0, <1",
+  "pydantic>=1.9.0, <3",
     "typing-extensions>=4.7, <5",
-    "anyio>=3.5.0, <5",
-    "distro>=1.7.0, <2",
-    "sniffio",
+  "anyio>=3.5.0, <5",
+  "distro>=1.7.0, <2",
+  "sniffio",
     "tqdm",
     "rich",
     "click",
@@ -22,6 +23,7 @@ dependencies = [
     "fire",
     "requests",
 ]
+
 requires-python = ">= 3.12"
 classifiers = [
   "Typing :: Typed",
@@ -46,7 +48,7 @@ aiohttp = ["aiohttp", "httpx_aiohttp>=0.1.9"]
 
 [tool.uv]
 managed = true
-required-version = ">=0.5.0"
+required-version = ">=0.9"
 conflicts = [
     [
       { group = "pydantic-v1" },
@@ -58,7 +60,7 @@ conflicts = [
 # version pins are in uv.lock
 dev = [
     "pyright==1.1.399",
-    "mypy",
+    "mypy==1.17",
     "respx",
     "pytest>=7.1.1",
     "pytest-asyncio",