llamastack · yanxi0830 · Feb 14, 2025
diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
@@ -39,6 +39,7 @@
     providers,
     telemetry,
     vector_io,
+    benchmarks,
     eval_tasks,
     toolgroups,
     vector_dbs,
@@ -94,6 +95,7 @@ class LlamaStackClient(SyncAPIClient):
     scoring: scoring.ScoringResource
     scoring_functions: scoring_functions.ScoringFunctionsResource
     eval_tasks: eval_tasks.EvalTasksResource
+    benchmarks: benchmarks.BenchmarksResource
     with_raw_response: LlamaStackClientWithRawResponse
     with_streaming_response: LlamaStackClientWithStreamedResponse
 
@@ -176,6 +178,7 @@ def __init__(
         self.scoring = scoring.ScoringResource(self)
         self.scoring_functions = scoring_functions.ScoringFunctionsResource(self)
         self.eval_tasks = eval_tasks.EvalTasksResource(self)
+        self.benchmarks = benchmarks.BenchmarksResource(self)
         self.with_raw_response = LlamaStackClientWithRawResponse(self)
         self.with_streaming_response = LlamaStackClientWithStreamedResponse(self)
 
@@ -310,6 +313,7 @@ class AsyncLlamaStackClient(AsyncAPIClient):
     scoring: scoring.AsyncScoringResource
     scoring_functions: scoring_functions.AsyncScoringFunctionsResource
     eval_tasks: eval_tasks.AsyncEvalTasksResource
+    benchmarks: benchmarks.AsyncBenchmarksResource
     with_raw_response: AsyncLlamaStackClientWithRawResponse
     with_streaming_response: AsyncLlamaStackClientWithStreamedResponse
 
@@ -392,6 +396,7 @@ def __init__(
         self.scoring = scoring.AsyncScoringResource(self)
         self.scoring_functions = scoring_functions.AsyncScoringFunctionsResource(self)
         self.eval_tasks = eval_tasks.AsyncEvalTasksResource(self)
+        self.benchmarks = benchmarks.AsyncBenchmarksResource(self)
         self.with_raw_response = AsyncLlamaStackClientWithRawResponse(self)
         self.with_streaming_response = AsyncLlamaStackClientWithStreamedResponse(self)
 
@@ -529,6 +534,7 @@ def __init__(self, client: LlamaStackClient) -> None:
         self.scoring = scoring.ScoringResourceWithRawResponse(client.scoring)
         self.scoring_functions = scoring_functions.ScoringFunctionsResourceWithRawResponse(client.scoring_functions)
         self.eval_tasks = eval_tasks.EvalTasksResourceWithRawResponse(client.eval_tasks)
+        self.benchmarks = benchmarks.BenchmarksResourceWithRawResponse(client.benchmarks)
 
 
 class AsyncLlamaStackClientWithRawResponse:
@@ -560,6 +566,7 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
             client.scoring_functions
         )
         self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithRawResponse(client.eval_tasks)
+        self.benchmarks = benchmarks.AsyncBenchmarksResourceWithRawResponse(client.benchmarks)
 
 
 class LlamaStackClientWithStreamedResponse:
@@ -591,6 +598,7 @@ def __init__(self, client: LlamaStackClient) -> None:
             client.scoring_functions
         )
         self.eval_tasks = eval_tasks.EvalTasksResourceWithStreamingResponse(client.eval_tasks)
+        self.benchmarks = benchmarks.BenchmarksResourceWithStreamingResponse(client.benchmarks)
 
 
 class AsyncLlamaStackClientWithStreamedResponse:
@@ -624,6 +632,7 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
             client.scoring_functions
         )
         self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithStreamingResponse(client.eval_tasks)
+        self.benchmarks = benchmarks.AsyncBenchmarksResourceWithStreamingResponse(client.benchmarks)
 
 
 Client = LlamaStackClient

diff --git a/src/llama_stack_client/_decoders/jsonl.py b/src/llama_stack_client/_decoders/jsonl.py
@@ -17,18 +17,29 @@ class JSONLDecoder(Generic[_T]):
     into a given type.
     """
 
-    http_response: httpx.Response | None
+    http_response: httpx.Response
     """The HTTP response this decoder was constructed from"""
 
     def __init__(
-        self, *, raw_iterator: Iterator[bytes], line_type: type[_T], http_response: httpx.Response | None
+        self,
+        *,
+        raw_iterator: Iterator[bytes],
+        line_type: type[_T],
+        http_response: httpx.Response,
     ) -> None:
         super().__init__()
         self.http_response = http_response
         self._raw_iterator = raw_iterator
         self._line_type = line_type
         self._iterator = self.__decode__()
 
+    def close(self) -> None:
+        """Close the response body stream.
+
+        This is called automatically if you consume the entire stream.
+        """
+        self.http_response.close()
+
     def __decode__(self) -> Iterator[_T]:
         buf = b""
         for chunk in self._raw_iterator:
@@ -63,17 +74,28 @@ class AsyncJSONLDecoder(Generic[_T]):
     into a given type.
     """
 
-    http_response: httpx.Response | None
+    http_response: httpx.Response
 
     def __init__(
-        self, *, raw_iterator: AsyncIterator[bytes], line_type: type[_T], http_response: httpx.Response | None
+        self,
+        *,
+        raw_iterator: AsyncIterator[bytes],
+        line_type: type[_T],
+        http_response: httpx.Response,
     ) -> None:
         super().__init__()
         self.http_response = http_response
         self._raw_iterator = raw_iterator
         self._line_type = line_type
         self._iterator = self.__decode__()
 
+    async def close(self) -> None:
+        """Close the response body stream.
+
+        This is called automatically if you consume the entire stream.
+        """
+        await self.http_response.aclose()
+
     async def __decode__(self) -> AsyncIterator[_T]:
         buf = b""
         async for chunk in self._raw_iterator:

diff --git a/src/llama_stack_client/_models.py b/src/llama_stack_client/_models.py
@@ -426,10 +426,16 @@ def construct_type(*, value: object, type_: object) -> object:
 
     If the given value does not match the expected type then it is returned as-is.
     """
+
+    # store a reference to the original type we were given before we extract any inner
+    # types so that we can properly resolve forward references in `TypeAliasType` annotations
+    original_type = None
+
     # we allow `object` as the input type because otherwise, passing things like
     # `Literal['value']` will be reported as a type error by type checkers
     type_ = cast("type[object]", type_)
     if is_type_alias_type(type_):
+        original_type = type_  # type: ignore[unreachable]
         type_ = type_.__value__  # type: ignore[unreachable]
 
     # unwrap `Annotated[T, ...]` -> `T`
@@ -446,7 +452,7 @@ def construct_type(*, value: object, type_: object) -> object:
 
     if is_union(origin):
         try:
-            return validate_type(type_=cast("type[object]", type_), value=value)
+            return validate_type(type_=cast("type[object]", original_type or type_), value=value)
         except Exception:
             pass
 

diff --git a/src/llama_stack_client/_response.py b/src/llama_stack_client/_response.py
@@ -144,7 +144,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
                 return cast(
                     R,
                     cast("type[JSONLDecoder[Any]]", cast_to)(
-                        raw_iterator=self.http_response.iter_bytes(chunk_size=4096),
+                        raw_iterator=self.http_response.iter_bytes(chunk_size=64),
                         line_type=extract_type_arg(cast_to, 0),
                         http_response=self.http_response,
                     ),
@@ -154,7 +154,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
                 return cast(
                     R,
                     cast("type[AsyncJSONLDecoder[Any]]", cast_to)(
-                        raw_iterator=self.http_response.aiter_bytes(chunk_size=4096),
+                        raw_iterator=self.http_response.aiter_bytes(chunk_size=64),
                         line_type=extract_type_arg(cast_to, 0),
                         http_response=self.http_response,
                     ),

diff --git a/src/llama_stack_client/_utils/_transform.py b/src/llama_stack_client/_utils/_transform.py
@@ -25,7 +25,7 @@
     is_annotated_type,
     strip_annotated_type,
 )
-from .._compat import model_dump, is_typeddict
+from .._compat import get_origin, model_dump, is_typeddict
 
 _T = TypeVar("_T")
 
@@ -164,9 +164,14 @@ def _transform_recursive(
         inner_type = annotation
 
     stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
     if is_typeddict(stripped_type) and is_mapping(data):
         return _transform_typeddict(data, stripped_type)
 
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
     if (
         # List[T]
         (is_list_type(stripped_type) and is_list(data))
@@ -307,9 +312,14 @@ async def _async_transform_recursive(
         inner_type = annotation
 
     stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
     if is_typeddict(stripped_type) and is_mapping(data):
         return await _async_transform_typeddict(data, stripped_type)
 
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
     if (
         # List[T]
         (is_list_type(stripped_type) and is_list(data))

diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py
@@ -120,6 +120,14 @@
     VectorIoResourceWithStreamingResponse,
     AsyncVectorIoResourceWithStreamingResponse,
 )
+from .benchmarks import (
+    BenchmarksResource,
+    AsyncBenchmarksResource,
+    BenchmarksResourceWithRawResponse,
+    AsyncBenchmarksResourceWithRawResponse,
+    BenchmarksResourceWithStreamingResponse,
+    AsyncBenchmarksResourceWithStreamingResponse,
+)
 from .eval_tasks import (
     EvalTasksResource,
     AsyncEvalTasksResource,
@@ -324,4 +332,10 @@
     "AsyncEvalTasksResourceWithRawResponse",
     "EvalTasksResourceWithStreamingResponse",
     "AsyncEvalTasksResourceWithStreamingResponse",
+    "BenchmarksResource",
+    "AsyncBenchmarksResource",
+    "BenchmarksResourceWithRawResponse",
+    "AsyncBenchmarksResourceWithRawResponse",
+    "BenchmarksResourceWithStreamingResponse",
+    "AsyncBenchmarksResourceWithStreamingResponse",
 ]