Skip to content

Commit 6880a8d

Browse files
committed
Sync updates from stainless branch: ashwinb/dev
1 parent d6f3ef2 commit 6880a8d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+3054
-230
lines changed

src/llama_stack_client/_client.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
from ._version import __version__
2828
from .resources import (
2929
tools,
30-
memory,
3130
models,
3231
routes,
3332
safety,
@@ -39,10 +38,10 @@
3938
inference,
4039
providers,
4140
telemetry,
41+
vector_io,
4242
eval_tasks,
4343
toolgroups,
44-
memory_banks,
45-
tool_runtime,
44+
vector_dbs,
4645
batch_inference,
4746
scoring_functions,
4847
synthetic_data_generation,
@@ -56,6 +55,7 @@
5655
)
5756
from .resources.eval import eval
5857
from .resources.agents import agents
58+
from .resources.tool_runtime import tool_runtime
5959
from .resources.post_training import post_training
6060

6161
__all__ = [
@@ -80,8 +80,8 @@ class LlamaStackClient(SyncAPIClient):
8080
eval: eval.EvalResource
8181
inspect: inspect.InspectResource
8282
inference: inference.InferenceResource
83-
memory: memory.MemoryResource
84-
memory_banks: memory_banks.MemoryBanksResource
83+
vector_io: vector_io.VectorIoResource
84+
vector_dbs: vector_dbs.VectorDBsResource
8585
models: models.ModelsResource
8686
post_training: post_training.PostTrainingResource
8787
providers: providers.ProvidersResource
@@ -153,8 +153,8 @@ def __init__(
153153
self.eval = eval.EvalResource(self)
154154
self.inspect = inspect.InspectResource(self)
155155
self.inference = inference.InferenceResource(self)
156-
self.memory = memory.MemoryResource(self)
157-
self.memory_banks = memory_banks.MemoryBanksResource(self)
156+
self.vector_io = vector_io.VectorIoResource(self)
157+
self.vector_dbs = vector_dbs.VectorDBsResource(self)
158158
self.models = models.ModelsResource(self)
159159
self.post_training = post_training.PostTrainingResource(self)
160160
self.providers = providers.ProvidersResource(self)
@@ -277,8 +277,8 @@ class AsyncLlamaStackClient(AsyncAPIClient):
277277
eval: eval.AsyncEvalResource
278278
inspect: inspect.AsyncInspectResource
279279
inference: inference.AsyncInferenceResource
280-
memory: memory.AsyncMemoryResource
281-
memory_banks: memory_banks.AsyncMemoryBanksResource
280+
vector_io: vector_io.AsyncVectorIoResource
281+
vector_dbs: vector_dbs.AsyncVectorDBsResource
282282
models: models.AsyncModelsResource
283283
post_training: post_training.AsyncPostTrainingResource
284284
providers: providers.AsyncProvidersResource
@@ -350,8 +350,8 @@ def __init__(
350350
self.eval = eval.AsyncEvalResource(self)
351351
self.inspect = inspect.AsyncInspectResource(self)
352352
self.inference = inference.AsyncInferenceResource(self)
353-
self.memory = memory.AsyncMemoryResource(self)
354-
self.memory_banks = memory_banks.AsyncMemoryBanksResource(self)
353+
self.vector_io = vector_io.AsyncVectorIoResource(self)
354+
self.vector_dbs = vector_dbs.AsyncVectorDBsResource(self)
355355
self.models = models.AsyncModelsResource(self)
356356
self.post_training = post_training.AsyncPostTrainingResource(self)
357357
self.providers = providers.AsyncProvidersResource(self)
@@ -475,8 +475,8 @@ def __init__(self, client: LlamaStackClient) -> None:
475475
self.eval = eval.EvalResourceWithRawResponse(client.eval)
476476
self.inspect = inspect.InspectResourceWithRawResponse(client.inspect)
477477
self.inference = inference.InferenceResourceWithRawResponse(client.inference)
478-
self.memory = memory.MemoryResourceWithRawResponse(client.memory)
479-
self.memory_banks = memory_banks.MemoryBanksResourceWithRawResponse(client.memory_banks)
478+
self.vector_io = vector_io.VectorIoResourceWithRawResponse(client.vector_io)
479+
self.vector_dbs = vector_dbs.VectorDBsResourceWithRawResponse(client.vector_dbs)
480480
self.models = models.ModelsResourceWithRawResponse(client.models)
481481
self.post_training = post_training.PostTrainingResourceWithRawResponse(client.post_training)
482482
self.providers = providers.ProvidersResourceWithRawResponse(client.providers)
@@ -504,8 +504,8 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
504504
self.eval = eval.AsyncEvalResourceWithRawResponse(client.eval)
505505
self.inspect = inspect.AsyncInspectResourceWithRawResponse(client.inspect)
506506
self.inference = inference.AsyncInferenceResourceWithRawResponse(client.inference)
507-
self.memory = memory.AsyncMemoryResourceWithRawResponse(client.memory)
508-
self.memory_banks = memory_banks.AsyncMemoryBanksResourceWithRawResponse(client.memory_banks)
507+
self.vector_io = vector_io.AsyncVectorIoResourceWithRawResponse(client.vector_io)
508+
self.vector_dbs = vector_dbs.AsyncVectorDBsResourceWithRawResponse(client.vector_dbs)
509509
self.models = models.AsyncModelsResourceWithRawResponse(client.models)
510510
self.post_training = post_training.AsyncPostTrainingResourceWithRawResponse(client.post_training)
511511
self.providers = providers.AsyncProvidersResourceWithRawResponse(client.providers)
@@ -535,8 +535,8 @@ def __init__(self, client: LlamaStackClient) -> None:
535535
self.eval = eval.EvalResourceWithStreamingResponse(client.eval)
536536
self.inspect = inspect.InspectResourceWithStreamingResponse(client.inspect)
537537
self.inference = inference.InferenceResourceWithStreamingResponse(client.inference)
538-
self.memory = memory.MemoryResourceWithStreamingResponse(client.memory)
539-
self.memory_banks = memory_banks.MemoryBanksResourceWithStreamingResponse(client.memory_banks)
538+
self.vector_io = vector_io.VectorIoResourceWithStreamingResponse(client.vector_io)
539+
self.vector_dbs = vector_dbs.VectorDBsResourceWithStreamingResponse(client.vector_dbs)
540540
self.models = models.ModelsResourceWithStreamingResponse(client.models)
541541
self.post_training = post_training.PostTrainingResourceWithStreamingResponse(client.post_training)
542542
self.providers = providers.ProvidersResourceWithStreamingResponse(client.providers)
@@ -566,8 +566,8 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
566566
self.eval = eval.AsyncEvalResourceWithStreamingResponse(client.eval)
567567
self.inspect = inspect.AsyncInspectResourceWithStreamingResponse(client.inspect)
568568
self.inference = inference.AsyncInferenceResourceWithStreamingResponse(client.inference)
569-
self.memory = memory.AsyncMemoryResourceWithStreamingResponse(client.memory)
570-
self.memory_banks = memory_banks.AsyncMemoryBanksResourceWithStreamingResponse(client.memory_banks)
569+
self.vector_io = vector_io.AsyncVectorIoResourceWithStreamingResponse(client.vector_io)
570+
self.vector_dbs = vector_dbs.AsyncVectorDBsResourceWithStreamingResponse(client.vector_dbs)
571571
self.models = models.AsyncModelsResourceWithStreamingResponse(client.models)
572572
self.post_training = post_training.AsyncPostTrainingResourceWithStreamingResponse(client.post_training)
573573
self.providers = providers.AsyncProvidersResourceWithStreamingResponse(client.providers)
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
from __future__ import annotations
2+
3+
import json
4+
from typing_extensions import Generic, TypeVar, Iterator, AsyncIterator
5+
6+
import httpx
7+
8+
from .._models import construct_type_unchecked
9+
10+
_T = TypeVar("_T")
11+
12+
13+
class JSONLDecoder(Generic[_T]):
14+
"""A decoder for [JSON Lines](https://jsonlines.org) format.
15+
16+
This class provides an iterator over a byte-iterator that parses each JSON Line
17+
into a given type.
18+
"""
19+
20+
http_response: httpx.Response | None
21+
"""The HTTP response this decoder was constructed from"""
22+
23+
def __init__(
24+
self, *, raw_iterator: Iterator[bytes], line_type: type[_T], http_response: httpx.Response | None
25+
) -> None:
26+
super().__init__()
27+
self.http_response = http_response
28+
self._raw_iterator = raw_iterator
29+
self._line_type = line_type
30+
self._iterator = self.__decode__()
31+
32+
def __decode__(self) -> Iterator[_T]:
33+
buf = b""
34+
for chunk in self._raw_iterator:
35+
for line in chunk.splitlines(keepends=True):
36+
buf += line
37+
if buf.endswith((b"\r", b"\n", b"\r\n")):
38+
yield construct_type_unchecked(
39+
value=json.loads(buf),
40+
type_=self._line_type,
41+
)
42+
buf = b""
43+
44+
# flush
45+
if buf:
46+
yield construct_type_unchecked(
47+
value=json.loads(buf),
48+
type_=self._line_type,
49+
)
50+
51+
def __next__(self) -> _T:
52+
return self._iterator.__next__()
53+
54+
def __iter__(self) -> Iterator[_T]:
55+
for item in self._iterator:
56+
yield item
57+
58+
59+
class AsyncJSONLDecoder(Generic[_T]):
60+
"""A decoder for [JSON Lines](https://jsonlines.org) format.
61+
62+
This class provides an async iterator over a byte-iterator that parses each JSON Line
63+
into a given type.
64+
"""
65+
66+
http_response: httpx.Response | None
67+
68+
def __init__(
69+
self, *, raw_iterator: AsyncIterator[bytes], line_type: type[_T], http_response: httpx.Response | None
70+
) -> None:
71+
super().__init__()
72+
self.http_response = http_response
73+
self._raw_iterator = raw_iterator
74+
self._line_type = line_type
75+
self._iterator = self.__decode__()
76+
77+
async def __decode__(self) -> AsyncIterator[_T]:
78+
buf = b""
79+
async for chunk in self._raw_iterator:
80+
for line in chunk.splitlines(keepends=True):
81+
buf += line
82+
if buf.endswith((b"\r", b"\n", b"\r\n")):
83+
yield construct_type_unchecked(
84+
value=json.loads(buf),
85+
type_=self._line_type,
86+
)
87+
buf = b""
88+
89+
# flush
90+
if buf:
91+
yield construct_type_unchecked(
92+
value=json.loads(buf),
93+
type_=self._line_type,
94+
)
95+
96+
async def __anext__(self) -> _T:
97+
return await self._iterator.__anext__()
98+
99+
async def __aiter__(self) -> AsyncIterator[_T]:
100+
async for item in self._iterator:
101+
yield item

src/llama_stack_client/_response.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
3131
from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
3232
from ._exceptions import LlamaStackClientError, APIResponseValidationError
33+
from ._decoders.jsonl import JSONLDecoder, AsyncJSONLDecoder
3334

3435
if TYPE_CHECKING:
3536
from ._models import FinalRequestOptions
@@ -136,6 +137,29 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
136137
if cast_to and is_annotated_type(cast_to):
137138
cast_to = extract_type_arg(cast_to, 0)
138139

140+
origin = get_origin(cast_to) or cast_to
141+
142+
if inspect.isclass(origin):
143+
if issubclass(cast(Any, origin), JSONLDecoder):
144+
return cast(
145+
R,
146+
cast("type[JSONLDecoder[Any]]", cast_to)(
147+
raw_iterator=self.http_response.iter_bytes(chunk_size=4096),
148+
line_type=extract_type_arg(cast_to, 0),
149+
http_response=self.http_response,
150+
),
151+
)
152+
153+
if issubclass(cast(Any, origin), AsyncJSONLDecoder):
154+
return cast(
155+
R,
156+
cast("type[AsyncJSONLDecoder[Any]]", cast_to)(
157+
raw_iterator=self.http_response.aiter_bytes(chunk_size=4096),
158+
line_type=extract_type_arg(cast_to, 0),
159+
http_response=self.http_response,
160+
),
161+
)
162+
139163
if self._is_sse_stream:
140164
if to:
141165
if not is_stream_class_type(to):
@@ -195,8 +219,6 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
195219
if cast_to == bool:
196220
return cast(R, response.text.lower() == "true")
197221

198-
origin = get_origin(cast_to) or cast_to
199-
200222
if origin == APIResponse:
201223
raise RuntimeError("Unexpected state - cast_to is `APIResponse`")
202224

src/llama_stack_client/resources/__init__.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,6 @@
2424
AgentsResourceWithStreamingResponse,
2525
AsyncAgentsResourceWithStreamingResponse,
2626
)
27-
from .memory import (
28-
MemoryResource,
29-
AsyncMemoryResource,
30-
MemoryResourceWithRawResponse,
31-
AsyncMemoryResourceWithRawResponse,
32-
MemoryResourceWithStreamingResponse,
33-
AsyncMemoryResourceWithStreamingResponse,
34-
)
3527
from .models import (
3628
ModelsResource,
3729
AsyncModelsResource,
@@ -120,6 +112,14 @@
120112
TelemetryResourceWithStreamingResponse,
121113
AsyncTelemetryResourceWithStreamingResponse,
122114
)
115+
from .vector_io import (
116+
VectorIoResource,
117+
AsyncVectorIoResource,
118+
VectorIoResourceWithRawResponse,
119+
AsyncVectorIoResourceWithRawResponse,
120+
VectorIoResourceWithStreamingResponse,
121+
AsyncVectorIoResourceWithStreamingResponse,
122+
)
123123
from .eval_tasks import (
124124
EvalTasksResource,
125125
AsyncEvalTasksResource,
@@ -136,13 +136,13 @@
136136
ToolgroupsResourceWithStreamingResponse,
137137
AsyncToolgroupsResourceWithStreamingResponse,
138138
)
139-
from .memory_banks import (
140-
MemoryBanksResource,
141-
AsyncMemoryBanksResource,
142-
MemoryBanksResourceWithRawResponse,
143-
AsyncMemoryBanksResourceWithRawResponse,
144-
MemoryBanksResourceWithStreamingResponse,
145-
AsyncMemoryBanksResourceWithStreamingResponse,
139+
from .vector_dbs import (
140+
VectorDBsResource,
141+
AsyncVectorDBsResource,
142+
VectorDBsResourceWithRawResponse,
143+
AsyncVectorDBsResourceWithRawResponse,
144+
VectorDBsResourceWithStreamingResponse,
145+
AsyncVectorDBsResourceWithStreamingResponse,
146146
)
147147
from .tool_runtime import (
148148
ToolRuntimeResource,
@@ -240,18 +240,18 @@
240240
"AsyncInferenceResourceWithRawResponse",
241241
"InferenceResourceWithStreamingResponse",
242242
"AsyncInferenceResourceWithStreamingResponse",
243-
"MemoryResource",
244-
"AsyncMemoryResource",
245-
"MemoryResourceWithRawResponse",
246-
"AsyncMemoryResourceWithRawResponse",
247-
"MemoryResourceWithStreamingResponse",
248-
"AsyncMemoryResourceWithStreamingResponse",
249-
"MemoryBanksResource",
250-
"AsyncMemoryBanksResource",
251-
"MemoryBanksResourceWithRawResponse",
252-
"AsyncMemoryBanksResourceWithRawResponse",
253-
"MemoryBanksResourceWithStreamingResponse",
254-
"AsyncMemoryBanksResourceWithStreamingResponse",
243+
"VectorIoResource",
244+
"AsyncVectorIoResource",
245+
"VectorIoResourceWithRawResponse",
246+
"AsyncVectorIoResourceWithRawResponse",
247+
"VectorIoResourceWithStreamingResponse",
248+
"AsyncVectorIoResourceWithStreamingResponse",
249+
"VectorDBsResource",
250+
"AsyncVectorDBsResource",
251+
"VectorDBsResourceWithRawResponse",
252+
"AsyncVectorDBsResourceWithRawResponse",
253+
"VectorDBsResourceWithStreamingResponse",
254+
"AsyncVectorDBsResourceWithStreamingResponse",
255255
"ModelsResource",
256256
"AsyncModelsResource",
257257
"ModelsResourceWithRawResponse",

src/llama_stack_client/resources/agents/agents.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def turn(self) -> TurnResource:
6666
@cached_property
6767
def with_raw_response(self) -> AgentsResourceWithRawResponse:
6868
"""
69-
This property can be used as a prefix for any HTTP method call to return the
69+
This property can be used as a prefix for any HTTP method call to return
7070
the raw response object instead of the parsed content.
7171
7272
For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
@@ -183,7 +183,7 @@ def turn(self) -> AsyncTurnResource:
183183
@cached_property
184184
def with_raw_response(self) -> AsyncAgentsResourceWithRawResponse:
185185
"""
186-
This property can be used as a prefix for any HTTP method call to return the
186+
This property can be used as a prefix for any HTTP method call to return
187187
the raw response object instead of the parsed content.
188188
189189
For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers

src/llama_stack_client/resources/agents/session.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class SessionResource(SyncAPIResource):
3232
@cached_property
3333
def with_raw_response(self) -> SessionResourceWithRawResponse:
3434
"""
35-
This property can be used as a prefix for any HTTP method call to return the
35+
This property can be used as a prefix for any HTTP method call to return
3636
the raw response object instead of the parsed content.
3737
3838
For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers
@@ -193,7 +193,7 @@ class AsyncSessionResource(AsyncAPIResource):
193193
@cached_property
194194
def with_raw_response(self) -> AsyncSessionResourceWithRawResponse:
195195
"""
196-
This property can be used as a prefix for any HTTP method call to return the
196+
This property can be used as a prefix for any HTTP method call to return
197197
the raw response object instead of the parsed content.
198198
199199
For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers

0 commit comments

Comments
 (0)