Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions src/llama_stack_client/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from ._version import __version__
from .resources import (
tools,
memory,
models,
routes,
safety,
Expand All @@ -39,10 +38,10 @@
inference,
providers,
telemetry,
vector_io,
eval_tasks,
toolgroups,
memory_banks,
tool_runtime,
vector_dbs,
batch_inference,
scoring_functions,
synthetic_data_generation,
Expand All @@ -56,6 +55,7 @@
)
from .resources.eval import eval
from .resources.agents import agents
from .resources.tool_runtime import tool_runtime
from .resources.post_training import post_training

__all__ = [
Expand All @@ -80,8 +80,8 @@ class LlamaStackClient(SyncAPIClient):
eval: eval.EvalResource
inspect: inspect.InspectResource
inference: inference.InferenceResource
memory: memory.MemoryResource
memory_banks: memory_banks.MemoryBanksResource
vector_io: vector_io.VectorIoResource
vector_dbs: vector_dbs.VectorDBsResource
models: models.ModelsResource
post_training: post_training.PostTrainingResource
providers: providers.ProvidersResource
Expand Down Expand Up @@ -153,8 +153,8 @@ def __init__(
self.eval = eval.EvalResource(self)
self.inspect = inspect.InspectResource(self)
self.inference = inference.InferenceResource(self)
self.memory = memory.MemoryResource(self)
self.memory_banks = memory_banks.MemoryBanksResource(self)
self.vector_io = vector_io.VectorIoResource(self)
self.vector_dbs = vector_dbs.VectorDBsResource(self)
self.models = models.ModelsResource(self)
self.post_training = post_training.PostTrainingResource(self)
self.providers = providers.ProvidersResource(self)
Expand Down Expand Up @@ -277,8 +277,8 @@ class AsyncLlamaStackClient(AsyncAPIClient):
eval: eval.AsyncEvalResource
inspect: inspect.AsyncInspectResource
inference: inference.AsyncInferenceResource
memory: memory.AsyncMemoryResource
memory_banks: memory_banks.AsyncMemoryBanksResource
vector_io: vector_io.AsyncVectorIoResource
vector_dbs: vector_dbs.AsyncVectorDBsResource
models: models.AsyncModelsResource
post_training: post_training.AsyncPostTrainingResource
providers: providers.AsyncProvidersResource
Expand Down Expand Up @@ -350,8 +350,8 @@ def __init__(
self.eval = eval.AsyncEvalResource(self)
self.inspect = inspect.AsyncInspectResource(self)
self.inference = inference.AsyncInferenceResource(self)
self.memory = memory.AsyncMemoryResource(self)
self.memory_banks = memory_banks.AsyncMemoryBanksResource(self)
self.vector_io = vector_io.AsyncVectorIoResource(self)
self.vector_dbs = vector_dbs.AsyncVectorDBsResource(self)
self.models = models.AsyncModelsResource(self)
self.post_training = post_training.AsyncPostTrainingResource(self)
self.providers = providers.AsyncProvidersResource(self)
Expand Down Expand Up @@ -475,8 +475,8 @@ def __init__(self, client: LlamaStackClient) -> None:
self.eval = eval.EvalResourceWithRawResponse(client.eval)
self.inspect = inspect.InspectResourceWithRawResponse(client.inspect)
self.inference = inference.InferenceResourceWithRawResponse(client.inference)
self.memory = memory.MemoryResourceWithRawResponse(client.memory)
self.memory_banks = memory_banks.MemoryBanksResourceWithRawResponse(client.memory_banks)
self.vector_io = vector_io.VectorIoResourceWithRawResponse(client.vector_io)
self.vector_dbs = vector_dbs.VectorDBsResourceWithRawResponse(client.vector_dbs)
self.models = models.ModelsResourceWithRawResponse(client.models)
self.post_training = post_training.PostTrainingResourceWithRawResponse(client.post_training)
self.providers = providers.ProvidersResourceWithRawResponse(client.providers)
Expand Down Expand Up @@ -504,8 +504,8 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
self.eval = eval.AsyncEvalResourceWithRawResponse(client.eval)
self.inspect = inspect.AsyncInspectResourceWithRawResponse(client.inspect)
self.inference = inference.AsyncInferenceResourceWithRawResponse(client.inference)
self.memory = memory.AsyncMemoryResourceWithRawResponse(client.memory)
self.memory_banks = memory_banks.AsyncMemoryBanksResourceWithRawResponse(client.memory_banks)
self.vector_io = vector_io.AsyncVectorIoResourceWithRawResponse(client.vector_io)
self.vector_dbs = vector_dbs.AsyncVectorDBsResourceWithRawResponse(client.vector_dbs)
self.models = models.AsyncModelsResourceWithRawResponse(client.models)
self.post_training = post_training.AsyncPostTrainingResourceWithRawResponse(client.post_training)
self.providers = providers.AsyncProvidersResourceWithRawResponse(client.providers)
Expand Down Expand Up @@ -535,8 +535,8 @@ def __init__(self, client: LlamaStackClient) -> None:
self.eval = eval.EvalResourceWithStreamingResponse(client.eval)
self.inspect = inspect.InspectResourceWithStreamingResponse(client.inspect)
self.inference = inference.InferenceResourceWithStreamingResponse(client.inference)
self.memory = memory.MemoryResourceWithStreamingResponse(client.memory)
self.memory_banks = memory_banks.MemoryBanksResourceWithStreamingResponse(client.memory_banks)
self.vector_io = vector_io.VectorIoResourceWithStreamingResponse(client.vector_io)
self.vector_dbs = vector_dbs.VectorDBsResourceWithStreamingResponse(client.vector_dbs)
self.models = models.ModelsResourceWithStreamingResponse(client.models)
self.post_training = post_training.PostTrainingResourceWithStreamingResponse(client.post_training)
self.providers = providers.ProvidersResourceWithStreamingResponse(client.providers)
Expand Down Expand Up @@ -566,8 +566,8 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
self.eval = eval.AsyncEvalResourceWithStreamingResponse(client.eval)
self.inspect = inspect.AsyncInspectResourceWithStreamingResponse(client.inspect)
self.inference = inference.AsyncInferenceResourceWithStreamingResponse(client.inference)
self.memory = memory.AsyncMemoryResourceWithStreamingResponse(client.memory)
self.memory_banks = memory_banks.AsyncMemoryBanksResourceWithStreamingResponse(client.memory_banks)
self.vector_io = vector_io.AsyncVectorIoResourceWithStreamingResponse(client.vector_io)
self.vector_dbs = vector_dbs.AsyncVectorDBsResourceWithStreamingResponse(client.vector_dbs)
self.models = models.AsyncModelsResourceWithStreamingResponse(client.models)
self.post_training = post_training.AsyncPostTrainingResourceWithStreamingResponse(client.post_training)
self.providers = providers.AsyncProvidersResourceWithStreamingResponse(client.providers)
Expand Down
101 changes: 101 additions & 0 deletions src/llama_stack_client/_decoders/jsonl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from __future__ import annotations

import json
from typing_extensions import Generic, TypeVar, Iterator, AsyncIterator

import httpx

from .._models import construct_type_unchecked

_T = TypeVar("_T")


class JSONLDecoder(Generic[_T]):
"""A decoder for [JSON Lines](https://jsonlines.org) format.

This class provides an iterator over a byte-iterator that parses each JSON Line
into a given type.
"""

http_response: httpx.Response | None
"""The HTTP response this decoder was constructed from"""

def __init__(
self, *, raw_iterator: Iterator[bytes], line_type: type[_T], http_response: httpx.Response | None
) -> None:
super().__init__()
self.http_response = http_response
self._raw_iterator = raw_iterator
self._line_type = line_type
self._iterator = self.__decode__()

def __decode__(self) -> Iterator[_T]:
buf = b""
for chunk in self._raw_iterator:
for line in chunk.splitlines(keepends=True):
buf += line
if buf.endswith((b"\r", b"\n", b"\r\n")):
yield construct_type_unchecked(
value=json.loads(buf),
type_=self._line_type,
)
buf = b""

# flush
if buf:
yield construct_type_unchecked(
value=json.loads(buf),
type_=self._line_type,
)

def __next__(self) -> _T:
return self._iterator.__next__()

def __iter__(self) -> Iterator[_T]:
for item in self._iterator:
yield item


class AsyncJSONLDecoder(Generic[_T]):
"""A decoder for [JSON Lines](https://jsonlines.org) format.

This class provides an async iterator over a byte-iterator that parses each JSON Line
into a given type.
"""

http_response: httpx.Response | None

def __init__(
self, *, raw_iterator: AsyncIterator[bytes], line_type: type[_T], http_response: httpx.Response | None
) -> None:
super().__init__()
self.http_response = http_response
self._raw_iterator = raw_iterator
self._line_type = line_type
self._iterator = self.__decode__()

async def __decode__(self) -> AsyncIterator[_T]:
buf = b""
async for chunk in self._raw_iterator:
for line in chunk.splitlines(keepends=True):
buf += line
if buf.endswith((b"\r", b"\n", b"\r\n")):
yield construct_type_unchecked(
value=json.loads(buf),
type_=self._line_type,
)
buf = b""

# flush
if buf:
yield construct_type_unchecked(
value=json.loads(buf),
type_=self._line_type,
)

async def __anext__(self) -> _T:
return await self._iterator.__anext__()

async def __aiter__(self) -> AsyncIterator[_T]:
async for item in self._iterator:
yield item
26 changes: 24 additions & 2 deletions src/llama_stack_client/_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
from ._exceptions import LlamaStackClientError, APIResponseValidationError
from ._decoders.jsonl import JSONLDecoder, AsyncJSONLDecoder

if TYPE_CHECKING:
from ._models import FinalRequestOptions
Expand Down Expand Up @@ -136,6 +137,29 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
if cast_to and is_annotated_type(cast_to):
cast_to = extract_type_arg(cast_to, 0)

origin = get_origin(cast_to) or cast_to

if inspect.isclass(origin):
if issubclass(cast(Any, origin), JSONLDecoder):
return cast(
R,
cast("type[JSONLDecoder[Any]]", cast_to)(
raw_iterator=self.http_response.iter_bytes(chunk_size=4096),
line_type=extract_type_arg(cast_to, 0),
http_response=self.http_response,
),
)

if issubclass(cast(Any, origin), AsyncJSONLDecoder):
return cast(
R,
cast("type[AsyncJSONLDecoder[Any]]", cast_to)(
raw_iterator=self.http_response.aiter_bytes(chunk_size=4096),
line_type=extract_type_arg(cast_to, 0),
http_response=self.http_response,
),
)

if self._is_sse_stream:
if to:
if not is_stream_class_type(to):
Expand Down Expand Up @@ -195,8 +219,6 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
if cast_to == bool:
return cast(R, response.text.lower() == "true")

origin = get_origin(cast_to) or cast_to

if origin == APIResponse:
raise RuntimeError("Unexpected state - cast_to is `APIResponse`")

Expand Down
54 changes: 27 additions & 27 deletions src/llama_stack_client/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,6 @@
AgentsResourceWithStreamingResponse,
AsyncAgentsResourceWithStreamingResponse,
)
from .memory import (
MemoryResource,
AsyncMemoryResource,
MemoryResourceWithRawResponse,
AsyncMemoryResourceWithRawResponse,
MemoryResourceWithStreamingResponse,
AsyncMemoryResourceWithStreamingResponse,
)
from .models import (
ModelsResource,
AsyncModelsResource,
Expand Down Expand Up @@ -120,6 +112,14 @@
TelemetryResourceWithStreamingResponse,
AsyncTelemetryResourceWithStreamingResponse,
)
from .vector_io import (
VectorIoResource,
AsyncVectorIoResource,
VectorIoResourceWithRawResponse,
AsyncVectorIoResourceWithRawResponse,
VectorIoResourceWithStreamingResponse,
AsyncVectorIoResourceWithStreamingResponse,
)
from .eval_tasks import (
EvalTasksResource,
AsyncEvalTasksResource,
Expand All @@ -136,13 +136,13 @@
ToolgroupsResourceWithStreamingResponse,
AsyncToolgroupsResourceWithStreamingResponse,
)
from .memory_banks import (
MemoryBanksResource,
AsyncMemoryBanksResource,
MemoryBanksResourceWithRawResponse,
AsyncMemoryBanksResourceWithRawResponse,
MemoryBanksResourceWithStreamingResponse,
AsyncMemoryBanksResourceWithStreamingResponse,
from .vector_dbs import (
VectorDBsResource,
AsyncVectorDBsResource,
VectorDBsResourceWithRawResponse,
AsyncVectorDBsResourceWithRawResponse,
VectorDBsResourceWithStreamingResponse,
AsyncVectorDBsResourceWithStreamingResponse,
)
from .tool_runtime import (
ToolRuntimeResource,
Expand Down Expand Up @@ -240,18 +240,18 @@
"AsyncInferenceResourceWithRawResponse",
"InferenceResourceWithStreamingResponse",
"AsyncInferenceResourceWithStreamingResponse",
"MemoryResource",
"AsyncMemoryResource",
"MemoryResourceWithRawResponse",
"AsyncMemoryResourceWithRawResponse",
"MemoryResourceWithStreamingResponse",
"AsyncMemoryResourceWithStreamingResponse",
"MemoryBanksResource",
"AsyncMemoryBanksResource",
"MemoryBanksResourceWithRawResponse",
"AsyncMemoryBanksResourceWithRawResponse",
"MemoryBanksResourceWithStreamingResponse",
"AsyncMemoryBanksResourceWithStreamingResponse",
"VectorIoResource",
"AsyncVectorIoResource",
"VectorIoResourceWithRawResponse",
"AsyncVectorIoResourceWithRawResponse",
"VectorIoResourceWithStreamingResponse",
"AsyncVectorIoResourceWithStreamingResponse",
"VectorDBsResource",
"AsyncVectorDBsResource",
"VectorDBsResourceWithRawResponse",
"AsyncVectorDBsResourceWithRawResponse",
"VectorDBsResourceWithStreamingResponse",
"AsyncVectorDBsResourceWithStreamingResponse",
"ModelsResource",
"AsyncModelsResource",
"ModelsResourceWithRawResponse",
Expand Down
4 changes: 2 additions & 2 deletions src/llama_stack_client/resources/agents/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def turn(self) -> TurnResource:
@cached_property
def with_raw_response(self) -> AgentsResourceWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return the
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.

For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
Expand Down Expand Up @@ -183,7 +183,7 @@ def turn(self) -> AsyncTurnResource:
@cached_property
def with_raw_response(self) -> AsyncAgentsResourceWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return the
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.

For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
Expand Down
4 changes: 2 additions & 2 deletions src/llama_stack_client/resources/agents/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class SessionResource(SyncAPIResource):
@cached_property
def with_raw_response(self) -> SessionResourceWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return the
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.

For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
Expand Down Expand Up @@ -193,7 +193,7 @@ class AsyncSessionResource(AsyncAPIResource):
@cached_property
def with_raw_response(self) -> AsyncSessionResourceWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return the
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.

For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
Expand Down
Loading
Loading