From 6880a8df3122dcd985e67bb0d51f4976ed731201 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 22 Jan 2025 09:54:35 -0800 Subject: [PATCH] Sync updates from stainless branch: ashwinb/dev --- src/llama_stack_client/_client.py | 38 +- src/llama_stack_client/_decoders/jsonl.py | 101 ++++ src/llama_stack_client/_response.py | 26 +- src/llama_stack_client/resources/__init__.py | 54 +- .../resources/agents/agents.py | 4 +- .../resources/agents/session.py | 4 +- .../resources/agents/steps.py | 4 +- .../resources/agents/turn.py | 4 +- .../resources/batch_inference.py | 4 +- src/llama_stack_client/resources/datasetio.py | 4 +- src/llama_stack_client/resources/datasets.py | 4 +- src/llama_stack_client/resources/eval/eval.py | 4 +- src/llama_stack_client/resources/eval/jobs.py | 4 +- .../resources/eval_tasks.py | 4 +- src/llama_stack_client/resources/inference.py | 4 +- src/llama_stack_client/resources/inspect.py | 4 +- src/llama_stack_client/resources/models.py | 4 +- .../resources/post_training/job.py | 4 +- .../resources/post_training/post_training.py | 4 +- src/llama_stack_client/resources/providers.py | 4 +- src/llama_stack_client/resources/routes.py | 4 +- src/llama_stack_client/resources/safety.py | 4 +- src/llama_stack_client/resources/scoring.py | 4 +- .../resources/scoring_functions.py | 4 +- src/llama_stack_client/resources/shields.py | 4 +- .../resources/synthetic_data_generation.py | 4 +- src/llama_stack_client/resources/telemetry.py | 4 +- .../resources/tool_runtime/__init__.py | 33 ++ .../resources/tool_runtime/rag_tool.py | 330 ++++++++++++ .../resources/tool_runtime/tool_runtime.py | 358 ++++++++++++ .../resources/toolgroups.py | 4 +- src/llama_stack_client/resources/tools.py | 4 +- .../resources/vector_dbs.py | 508 ++++++++++++++++++ src/llama_stack_client/resources/vector_io.py | 320 +++++++++++ src/llama_stack_client/types/__init__.py | 16 +- .../types/agents/session.py | 75 +-- .../types/list_vector_dbs_response.py | 26 + .../types/memory_retrieval_step.py | 6 +- .../types/query_chunks_response.py | 20 + .../types/tool_runtime/__init__.py | 9 + .../types/tool_runtime/document_param.py | 38 ++ .../types/tool_runtime/query_config_param.py | 40 ++ .../types/tool_runtime/query_result.py | 12 + .../tool_runtime/rag_tool_insert_params.py | 23 + .../tool_runtime/rag_tool_query_params.py | 24 + .../types/tool_runtime_invoke_tool_params.py | 2 +- .../types/vector_db_list_response.py | 25 + .../types/vector_db_register_params.py | 25 + .../types/vector_db_register_response.py | 21 + .../types/vector_db_retrieve_response.py | 21 + .../types/vector_io_insert_params.py | 29 + .../types/vector_io_query_params.py | 23 + tests/api_resources/test_tool_runtime.py | 65 +-- tests/api_resources/test_vector_dbs.py | 374 +++++++++++++ tests/api_resources/test_vector_io.py | 242 +++++++++ tests/api_resources/tool_runtime/__init__.py | 1 + .../tool_runtime/test_rag_tool.py | 274 ++++++++++ tests/test_client.py | 25 +- 58 files changed, 3054 insertions(+), 230 deletions(-) create mode 100644 src/llama_stack_client/_decoders/jsonl.py create mode 100644 src/llama_stack_client/resources/tool_runtime/__init__.py create mode 100644 src/llama_stack_client/resources/tool_runtime/rag_tool.py create mode 100644 src/llama_stack_client/resources/tool_runtime/tool_runtime.py create mode 100644 src/llama_stack_client/resources/vector_dbs.py create mode 100644 src/llama_stack_client/resources/vector_io.py create mode 100644 src/llama_stack_client/types/list_vector_dbs_response.py create mode 100644 src/llama_stack_client/types/query_chunks_response.py create mode 100644 src/llama_stack_client/types/tool_runtime/__init__.py create mode 100644 src/llama_stack_client/types/tool_runtime/document_param.py create mode 100644 src/llama_stack_client/types/tool_runtime/query_config_param.py create mode 100644 src/llama_stack_client/types/tool_runtime/query_result.py create mode 100644 src/llama_stack_client/types/tool_runtime/rag_tool_insert_params.py create mode 100644 src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py create mode 100644 src/llama_stack_client/types/vector_db_list_response.py create mode 100644 src/llama_stack_client/types/vector_db_register_params.py create mode 100644 src/llama_stack_client/types/vector_db_register_response.py create mode 100644 src/llama_stack_client/types/vector_db_retrieve_response.py create mode 100644 src/llama_stack_client/types/vector_io_insert_params.py create mode 100644 src/llama_stack_client/types/vector_io_query_params.py create mode 100644 tests/api_resources/test_vector_dbs.py create mode 100644 tests/api_resources/test_vector_io.py create mode 100644 tests/api_resources/tool_runtime/__init__.py create mode 100644 tests/api_resources/tool_runtime/test_rag_tool.py diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py index 763dbe24..1f0c1d4e 100644 --- a/src/llama_stack_client/_client.py +++ b/src/llama_stack_client/_client.py @@ -27,7 +27,6 @@ from ._version import __version__ from .resources import ( tools, - memory, models, routes, safety, @@ -39,10 +38,10 @@ inference, providers, telemetry, + vector_io, eval_tasks, toolgroups, - memory_banks, - tool_runtime, + vector_dbs, batch_inference, scoring_functions, synthetic_data_generation, @@ -56,6 +55,7 @@ ) from .resources.eval import eval from .resources.agents import agents +from .resources.tool_runtime import tool_runtime from .resources.post_training import post_training __all__ = [ @@ -80,8 +80,8 @@ class LlamaStackClient(SyncAPIClient): eval: eval.EvalResource inspect: inspect.InspectResource inference: inference.InferenceResource - memory: memory.MemoryResource - memory_banks: memory_banks.MemoryBanksResource + vector_io: vector_io.VectorIoResource + vector_dbs: vector_dbs.VectorDBsResource models: models.ModelsResource post_training: post_training.PostTrainingResource providers: providers.ProvidersResource @@ -153,8 +153,8 @@ def __init__( self.eval = eval.EvalResource(self) self.inspect = inspect.InspectResource(self) self.inference = inference.InferenceResource(self) - self.memory = memory.MemoryResource(self) - self.memory_banks = memory_banks.MemoryBanksResource(self) + self.vector_io = vector_io.VectorIoResource(self) + self.vector_dbs = vector_dbs.VectorDBsResource(self) self.models = models.ModelsResource(self) self.post_training = post_training.PostTrainingResource(self) self.providers = providers.ProvidersResource(self) @@ -277,8 +277,8 @@ class AsyncLlamaStackClient(AsyncAPIClient): eval: eval.AsyncEvalResource inspect: inspect.AsyncInspectResource inference: inference.AsyncInferenceResource - memory: memory.AsyncMemoryResource - memory_banks: memory_banks.AsyncMemoryBanksResource + vector_io: vector_io.AsyncVectorIoResource + vector_dbs: vector_dbs.AsyncVectorDBsResource models: models.AsyncModelsResource post_training: post_training.AsyncPostTrainingResource providers: providers.AsyncProvidersResource @@ -350,8 +350,8 @@ def __init__( self.eval = eval.AsyncEvalResource(self) self.inspect = inspect.AsyncInspectResource(self) self.inference = inference.AsyncInferenceResource(self) - self.memory = memory.AsyncMemoryResource(self) - self.memory_banks = memory_banks.AsyncMemoryBanksResource(self) + self.vector_io = vector_io.AsyncVectorIoResource(self) + self.vector_dbs = vector_dbs.AsyncVectorDBsResource(self) self.models = models.AsyncModelsResource(self) self.post_training = post_training.AsyncPostTrainingResource(self) self.providers = providers.AsyncProvidersResource(self) @@ -475,8 +475,8 @@ def __init__(self, client: LlamaStackClient) -> None: self.eval = eval.EvalResourceWithRawResponse(client.eval) self.inspect = inspect.InspectResourceWithRawResponse(client.inspect) self.inference = inference.InferenceResourceWithRawResponse(client.inference) - self.memory = memory.MemoryResourceWithRawResponse(client.memory) - self.memory_banks = memory_banks.MemoryBanksResourceWithRawResponse(client.memory_banks) + self.vector_io = vector_io.VectorIoResourceWithRawResponse(client.vector_io) + self.vector_dbs = vector_dbs.VectorDBsResourceWithRawResponse(client.vector_dbs) self.models = models.ModelsResourceWithRawResponse(client.models) self.post_training = post_training.PostTrainingResourceWithRawResponse(client.post_training) self.providers = providers.ProvidersResourceWithRawResponse(client.providers) @@ -504,8 +504,8 @@ def __init__(self, client: AsyncLlamaStackClient) -> None: self.eval = eval.AsyncEvalResourceWithRawResponse(client.eval) self.inspect = inspect.AsyncInspectResourceWithRawResponse(client.inspect) self.inference = inference.AsyncInferenceResourceWithRawResponse(client.inference) - self.memory = memory.AsyncMemoryResourceWithRawResponse(client.memory) - self.memory_banks = memory_banks.AsyncMemoryBanksResourceWithRawResponse(client.memory_banks) + self.vector_io = vector_io.AsyncVectorIoResourceWithRawResponse(client.vector_io) + self.vector_dbs = vector_dbs.AsyncVectorDBsResourceWithRawResponse(client.vector_dbs) self.models = models.AsyncModelsResourceWithRawResponse(client.models) self.post_training = post_training.AsyncPostTrainingResourceWithRawResponse(client.post_training) self.providers = providers.AsyncProvidersResourceWithRawResponse(client.providers) @@ -535,8 +535,8 @@ def __init__(self, client: LlamaStackClient) -> None: self.eval = eval.EvalResourceWithStreamingResponse(client.eval) self.inspect = inspect.InspectResourceWithStreamingResponse(client.inspect) self.inference = inference.InferenceResourceWithStreamingResponse(client.inference) - self.memory = memory.MemoryResourceWithStreamingResponse(client.memory) - self.memory_banks = memory_banks.MemoryBanksResourceWithStreamingResponse(client.memory_banks) + self.vector_io = vector_io.VectorIoResourceWithStreamingResponse(client.vector_io) + self.vector_dbs = vector_dbs.VectorDBsResourceWithStreamingResponse(client.vector_dbs) self.models = models.ModelsResourceWithStreamingResponse(client.models) self.post_training = post_training.PostTrainingResourceWithStreamingResponse(client.post_training) self.providers = providers.ProvidersResourceWithStreamingResponse(client.providers) @@ -566,8 +566,8 @@ def __init__(self, client: AsyncLlamaStackClient) -> None: self.eval = eval.AsyncEvalResourceWithStreamingResponse(client.eval) self.inspect = inspect.AsyncInspectResourceWithStreamingResponse(client.inspect) self.inference = inference.AsyncInferenceResourceWithStreamingResponse(client.inference) - self.memory = memory.AsyncMemoryResourceWithStreamingResponse(client.memory) - self.memory_banks = memory_banks.AsyncMemoryBanksResourceWithStreamingResponse(client.memory_banks) + self.vector_io = vector_io.AsyncVectorIoResourceWithStreamingResponse(client.vector_io) + self.vector_dbs = vector_dbs.AsyncVectorDBsResourceWithStreamingResponse(client.vector_dbs) self.models = models.AsyncModelsResourceWithStreamingResponse(client.models) self.post_training = post_training.AsyncPostTrainingResourceWithStreamingResponse(client.post_training) self.providers = providers.AsyncProvidersResourceWithStreamingResponse(client.providers) diff --git a/src/llama_stack_client/_decoders/jsonl.py b/src/llama_stack_client/_decoders/jsonl.py new file mode 100644 index 00000000..e9d29a1c --- /dev/null +++ b/src/llama_stack_client/_decoders/jsonl.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +import json +from typing_extensions import Generic, TypeVar, Iterator, AsyncIterator + +import httpx + +from .._models import construct_type_unchecked + +_T = TypeVar("_T") + + +class JSONLDecoder(Generic[_T]): + """A decoder for [JSON Lines](https://jsonlines.org) format. + + This class provides an iterator over a byte-iterator that parses each JSON Line + into a given type. + """ + + http_response: httpx.Response | None + """The HTTP response this decoder was constructed from""" + + def __init__( + self, *, raw_iterator: Iterator[bytes], line_type: type[_T], http_response: httpx.Response | None + ) -> None: + super().__init__() + self.http_response = http_response + self._raw_iterator = raw_iterator + self._line_type = line_type + self._iterator = self.__decode__() + + def __decode__(self) -> Iterator[_T]: + buf = b"" + for chunk in self._raw_iterator: + for line in chunk.splitlines(keepends=True): + buf += line + if buf.endswith((b"\r", b"\n", b"\r\n")): + yield construct_type_unchecked( + value=json.loads(buf), + type_=self._line_type, + ) + buf = b"" + + # flush + if buf: + yield construct_type_unchecked( + value=json.loads(buf), + type_=self._line_type, + ) + + def __next__(self) -> _T: + return self._iterator.__next__() + + def __iter__(self) -> Iterator[_T]: + for item in self._iterator: + yield item + + +class AsyncJSONLDecoder(Generic[_T]): + """A decoder for [JSON Lines](https://jsonlines.org) format. + + This class provides an async iterator over a byte-iterator that parses each JSON Line + into a given type. + """ + + http_response: httpx.Response | None + + def __init__( + self, *, raw_iterator: AsyncIterator[bytes], line_type: type[_T], http_response: httpx.Response | None + ) -> None: + super().__init__() + self.http_response = http_response + self._raw_iterator = raw_iterator + self._line_type = line_type + self._iterator = self.__decode__() + + async def __decode__(self) -> AsyncIterator[_T]: + buf = b"" + async for chunk in self._raw_iterator: + for line in chunk.splitlines(keepends=True): + buf += line + if buf.endswith((b"\r", b"\n", b"\r\n")): + yield construct_type_unchecked( + value=json.loads(buf), + type_=self._line_type, + ) + buf = b"" + + # flush + if buf: + yield construct_type_unchecked( + value=json.loads(buf), + type_=self._line_type, + ) + + async def __anext__(self) -> _T: + return await self._iterator.__anext__() + + async def __aiter__(self) -> AsyncIterator[_T]: + async for item in self._iterator: + yield item diff --git a/src/llama_stack_client/_response.py b/src/llama_stack_client/_response.py index 06ae3a2d..d7e58fbe 100644 --- a/src/llama_stack_client/_response.py +++ b/src/llama_stack_client/_response.py @@ -30,6 +30,7 @@ from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type from ._exceptions import LlamaStackClientError, APIResponseValidationError +from ._decoders.jsonl import JSONLDecoder, AsyncJSONLDecoder if TYPE_CHECKING: from ._models import FinalRequestOptions @@ -136,6 +137,29 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T: if cast_to and is_annotated_type(cast_to): cast_to = extract_type_arg(cast_to, 0) + origin = get_origin(cast_to) or cast_to + + if inspect.isclass(origin): + if issubclass(cast(Any, origin), JSONLDecoder): + return cast( + R, + cast("type[JSONLDecoder[Any]]", cast_to)( + raw_iterator=self.http_response.iter_bytes(chunk_size=4096), + line_type=extract_type_arg(cast_to, 0), + http_response=self.http_response, + ), + ) + + if issubclass(cast(Any, origin), AsyncJSONLDecoder): + return cast( + R, + cast("type[AsyncJSONLDecoder[Any]]", cast_to)( + raw_iterator=self.http_response.aiter_bytes(chunk_size=4096), + line_type=extract_type_arg(cast_to, 0), + http_response=self.http_response, + ), + ) + if self._is_sse_stream: if to: if not is_stream_class_type(to): @@ -195,8 +219,6 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T: if cast_to == bool: return cast(R, response.text.lower() == "true") - origin = get_origin(cast_to) or cast_to - if origin == APIResponse: raise RuntimeError("Unexpected state - cast_to is `APIResponse`") diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py index e0d84adf..42188633 100644 --- a/src/llama_stack_client/resources/__init__.py +++ b/src/llama_stack_client/resources/__init__.py @@ -24,14 +24,6 @@ AgentsResourceWithStreamingResponse, AsyncAgentsResourceWithStreamingResponse, ) -from .memory import ( - MemoryResource, - AsyncMemoryResource, - MemoryResourceWithRawResponse, - AsyncMemoryResourceWithRawResponse, - MemoryResourceWithStreamingResponse, - AsyncMemoryResourceWithStreamingResponse, -) from .models import ( ModelsResource, AsyncModelsResource, @@ -120,6 +112,14 @@ TelemetryResourceWithStreamingResponse, AsyncTelemetryResourceWithStreamingResponse, ) +from .vector_io import ( + VectorIoResource, + AsyncVectorIoResource, + VectorIoResourceWithRawResponse, + AsyncVectorIoResourceWithRawResponse, + VectorIoResourceWithStreamingResponse, + AsyncVectorIoResourceWithStreamingResponse, +) from .eval_tasks import ( EvalTasksResource, AsyncEvalTasksResource, @@ -136,13 +136,13 @@ ToolgroupsResourceWithStreamingResponse, AsyncToolgroupsResourceWithStreamingResponse, ) -from .memory_banks import ( - MemoryBanksResource, - AsyncMemoryBanksResource, - MemoryBanksResourceWithRawResponse, - AsyncMemoryBanksResourceWithRawResponse, - MemoryBanksResourceWithStreamingResponse, - AsyncMemoryBanksResourceWithStreamingResponse, +from .vector_dbs import ( + VectorDBsResource, + AsyncVectorDBsResource, + VectorDBsResourceWithRawResponse, + AsyncVectorDBsResourceWithRawResponse, + VectorDBsResourceWithStreamingResponse, + AsyncVectorDBsResourceWithStreamingResponse, ) from .tool_runtime import ( ToolRuntimeResource, @@ -240,18 +240,18 @@ "AsyncInferenceResourceWithRawResponse", "InferenceResourceWithStreamingResponse", "AsyncInferenceResourceWithStreamingResponse", - "MemoryResource", - "AsyncMemoryResource", - "MemoryResourceWithRawResponse", - "AsyncMemoryResourceWithRawResponse", - "MemoryResourceWithStreamingResponse", - "AsyncMemoryResourceWithStreamingResponse", - "MemoryBanksResource", - "AsyncMemoryBanksResource", - "MemoryBanksResourceWithRawResponse", - "AsyncMemoryBanksResourceWithRawResponse", - "MemoryBanksResourceWithStreamingResponse", - "AsyncMemoryBanksResourceWithStreamingResponse", + "VectorIoResource", + "AsyncVectorIoResource", + "VectorIoResourceWithRawResponse", + "AsyncVectorIoResourceWithRawResponse", + "VectorIoResourceWithStreamingResponse", + "AsyncVectorIoResourceWithStreamingResponse", + "VectorDBsResource", + "AsyncVectorDBsResource", + "VectorDBsResourceWithRawResponse", + "AsyncVectorDBsResourceWithRawResponse", + "VectorDBsResourceWithStreamingResponse", + "AsyncVectorDBsResourceWithStreamingResponse", "ModelsResource", "AsyncModelsResource", "ModelsResourceWithRawResponse", diff --git a/src/llama_stack_client/resources/agents/agents.py b/src/llama_stack_client/resources/agents/agents.py index 23b8b5cc..c270e1a2 100644 --- a/src/llama_stack_client/resources/agents/agents.py +++ b/src/llama_stack_client/resources/agents/agents.py @@ -66,7 +66,7 @@ def turn(self) -> TurnResource: @cached_property def with_raw_response(self) -> AgentsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -183,7 +183,7 @@ def turn(self) -> AsyncTurnResource: @cached_property def with_raw_response(self) -> AsyncAgentsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/agents/session.py b/src/llama_stack_client/resources/agents/session.py index 8497c980..8aff2477 100644 --- a/src/llama_stack_client/resources/agents/session.py +++ b/src/llama_stack_client/resources/agents/session.py @@ -32,7 +32,7 @@ class SessionResource(SyncAPIResource): @cached_property def with_raw_response(self) -> SessionResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -193,7 +193,7 @@ class AsyncSessionResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncSessionResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/agents/steps.py b/src/llama_stack_client/resources/agents/steps.py index 590c88fb..53f8f3b0 100644 --- a/src/llama_stack_client/resources/agents/steps.py +++ b/src/llama_stack_client/resources/agents/steps.py @@ -24,7 +24,7 @@ class StepsResource(SyncAPIResource): @cached_property def with_raw_response(self) -> StepsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -96,7 +96,7 @@ class AsyncStepsResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncStepsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/agents/turn.py index d86fca7b..b76b65f1 100644 --- a/src/llama_stack_client/resources/agents/turn.py +++ b/src/llama_stack_client/resources/agents/turn.py @@ -35,7 +35,7 @@ class TurnResource(SyncAPIResource): @cached_property def with_raw_response(self) -> TurnResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -254,7 +254,7 @@ class AsyncTurnResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncTurnResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/batch_inference.py b/src/llama_stack_client/resources/batch_inference.py index ada3cc6e..fdf6ce9d 100644 --- a/src/llama_stack_client/resources/batch_inference.py +++ b/src/llama_stack_client/resources/batch_inference.py @@ -35,7 +35,7 @@ class BatchInferenceResource(SyncAPIResource): @cached_property def with_raw_response(self) -> BatchInferenceResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -176,7 +176,7 @@ class AsyncBatchInferenceResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncBatchInferenceResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/datasetio.py b/src/llama_stack_client/resources/datasetio.py index c49a8f5b..1441dff0 100644 --- a/src/llama_stack_client/resources/datasetio.py +++ b/src/llama_stack_client/resources/datasetio.py @@ -31,7 +31,7 @@ class DatasetioResource(SyncAPIResource): @cached_property def with_raw_response(self) -> DatasetioResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -156,7 +156,7 @@ class AsyncDatasetioResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncDatasetioResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/datasets.py b/src/llama_stack_client/resources/datasets.py index edb556fc..7e9d9ebb 100644 --- a/src/llama_stack_client/resources/datasets.py +++ b/src/llama_stack_client/resources/datasets.py @@ -35,7 +35,7 @@ class DatasetsResource(SyncAPIResource): @cached_property def with_raw_response(self) -> DatasetsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -241,7 +241,7 @@ class AsyncDatasetsResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncDatasetsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/eval/eval.py b/src/llama_stack_client/resources/eval/eval.py index fabf6d4f..3d25204f 100644 --- a/src/llama_stack_client/resources/eval/eval.py +++ b/src/llama_stack_client/resources/eval/eval.py @@ -44,7 +44,7 @@ def jobs(self) -> JobsResource: @cached_property def with_raw_response(self) -> EvalResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -166,7 +166,7 @@ def jobs(self) -> AsyncJobsResource: @cached_property def with_raw_response(self) -> AsyncEvalResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/eval/jobs.py b/src/llama_stack_client/resources/eval/jobs.py index c2657abf..f8bf6cd0 100644 --- a/src/llama_stack_client/resources/eval/jobs.py +++ b/src/llama_stack_client/resources/eval/jobs.py @@ -27,7 +27,7 @@ class JobsResource(SyncAPIResource): @cached_property def with_raw_response(self) -> JobsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -184,7 +184,7 @@ class AsyncJobsResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncJobsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/eval_tasks.py b/src/llama_stack_client/resources/eval_tasks.py index 2f7bc089..f6d4b960 100644 --- a/src/llama_stack_client/resources/eval_tasks.py +++ b/src/llama_stack_client/resources/eval_tasks.py @@ -33,7 +33,7 @@ class EvalTasksResource(SyncAPIResource): @cached_property def with_raw_response(self) -> EvalTasksResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -196,7 +196,7 @@ class AsyncEvalTasksResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncEvalTasksResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py index 96ca5f41..e5aa95a4 100644 --- a/src/llama_stack_client/resources/inference.py +++ b/src/llama_stack_client/resources/inference.py @@ -42,7 +42,7 @@ class InferenceResource(SyncAPIResource): @cached_property def with_raw_response(self) -> InferenceResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -452,7 +452,7 @@ class AsyncInferenceResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncInferenceResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/inspect.py b/src/llama_stack_client/resources/inspect.py index a307d2df..2dfb9134 100644 --- a/src/llama_stack_client/resources/inspect.py +++ b/src/llama_stack_client/resources/inspect.py @@ -25,7 +25,7 @@ class InspectResource(SyncAPIResource): @cached_property def with_raw_response(self) -> InspectResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -124,7 +124,7 @@ class AsyncInspectResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncInspectResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/models.py b/src/llama_stack_client/resources/models.py index 1e93f7d5..2d5297f2 100644 --- a/src/llama_stack_client/resources/models.py +++ b/src/llama_stack_client/resources/models.py @@ -34,7 +34,7 @@ class ModelsResource(SyncAPIResource): @cached_property def with_raw_response(self) -> ModelsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -237,7 +237,7 @@ class AsyncModelsResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncModelsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/post_training/job.py b/src/llama_stack_client/resources/post_training/job.py index bd98c268..d36de350 100644 --- a/src/llama_stack_client/resources/post_training/job.py +++ b/src/llama_stack_client/resources/post_training/job.py @@ -34,7 +34,7 @@ class JobResource(SyncAPIResource): @cached_property def with_raw_response(self) -> JobResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -228,7 +228,7 @@ class AsyncJobResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncJobResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/post_training/post_training.py b/src/llama_stack_client/resources/post_training/post_training.py index 36b4cd2a..6b8f2682 100644 --- a/src/llama_stack_client/resources/post_training/post_training.py +++ b/src/llama_stack_client/resources/post_training/post_training.py @@ -43,7 +43,7 @@ def job(self) -> JobResource: @cached_property def with_raw_response(self) -> PostTrainingResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -182,7 +182,7 @@ def job(self) -> AsyncJobResource: @cached_property def with_raw_response(self) -> AsyncPostTrainingResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/providers.py b/src/llama_stack_client/resources/providers.py index 61b3b7c8..190002a0 100644 --- a/src/llama_stack_client/resources/providers.py +++ b/src/llama_stack_client/resources/providers.py @@ -27,7 +27,7 @@ class ProvidersResource(SyncAPIResource): @cached_property def with_raw_response(self) -> ProvidersResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -91,7 +91,7 @@ class AsyncProvidersResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncProvidersResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/routes.py b/src/llama_stack_client/resources/routes.py index 7586fe63..2f10b219 100644 --- a/src/llama_stack_client/resources/routes.py +++ b/src/llama_stack_client/resources/routes.py @@ -27,7 +27,7 @@ class RoutesResource(SyncAPIResource): @cached_property def with_raw_response(self) -> RoutesResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -91,7 +91,7 @@ class AsyncRoutesResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncRoutesResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/safety.py b/src/llama_stack_client/resources/safety.py index 88128af6..a48d2a71 100644 --- a/src/llama_stack_client/resources/safety.py +++ b/src/llama_stack_client/resources/safety.py @@ -31,7 +31,7 @@ class SafetyResource(SyncAPIResource): @cached_property def with_raw_response(self) -> SafetyResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -102,7 +102,7 @@ class AsyncSafetyResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncSafetyResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/scoring.py b/src/llama_stack_client/resources/scoring.py index 8d2e7efb..8290b3e0 100644 --- a/src/llama_stack_client/resources/scoring.py +++ b/src/llama_stack_client/resources/scoring.py @@ -32,7 +32,7 @@ class ScoringResource(SyncAPIResource): @cached_property def with_raw_response(self) -> ScoringResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -151,7 +151,7 @@ class AsyncScoringResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncScoringResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/scoring_functions.py b/src/llama_stack_client/resources/scoring_functions.py index 115a9156..98e4c60d 100644 --- a/src/llama_stack_client/resources/scoring_functions.py +++ b/src/llama_stack_client/resources/scoring_functions.py @@ -34,7 +34,7 @@ class ScoringFunctionsResource(SyncAPIResource): @cached_property def with_raw_response(self) -> ScoringFunctionsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -197,7 +197,7 @@ class AsyncScoringFunctionsResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncScoringFunctionsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/shields.py b/src/llama_stack_client/resources/shields.py index e8327473..3a1dab86 100644 --- a/src/llama_stack_client/resources/shields.py +++ b/src/llama_stack_client/resources/shields.py @@ -33,7 +33,7 @@ class ShieldsResource(SyncAPIResource): @cached_property def with_raw_response(self) -> ShieldsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -191,7 +191,7 @@ class AsyncShieldsResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncShieldsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/synthetic_data_generation.py b/src/llama_stack_client/resources/synthetic_data_generation.py index 29151ce1..f47d82cf 100644 --- a/src/llama_stack_client/resources/synthetic_data_generation.py +++ b/src/llama_stack_client/resources/synthetic_data_generation.py @@ -32,7 +32,7 @@ class SyntheticDataGenerationResource(SyncAPIResource): @cached_property def with_raw_response(self) -> SyntheticDataGenerationResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -103,7 +103,7 @@ class AsyncSyntheticDataGenerationResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncSyntheticDataGenerationResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/telemetry.py b/src/llama_stack_client/resources/telemetry.py index 6e15678b..3231f5c2 100644 --- a/src/llama_stack_client/resources/telemetry.py +++ b/src/llama_stack_client/resources/telemetry.py @@ -42,7 +42,7 @@ class TelemetryResource(SyncAPIResource): @cached_property def with_raw_response(self) -> TelemetryResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -417,7 +417,7 @@ class AsyncTelemetryResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncTelemetryResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/tool_runtime/__init__.py b/src/llama_stack_client/resources/tool_runtime/__init__.py new file mode 100644 index 00000000..2ed86a39 --- /dev/null +++ b/src/llama_stack_client/resources/tool_runtime/__init__.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .rag_tool import ( + RagToolResource, + AsyncRagToolResource, + RagToolResourceWithRawResponse, + AsyncRagToolResourceWithRawResponse, + RagToolResourceWithStreamingResponse, + AsyncRagToolResourceWithStreamingResponse, +) +from .tool_runtime import ( + ToolRuntimeResource, + AsyncToolRuntimeResource, + ToolRuntimeResourceWithRawResponse, + AsyncToolRuntimeResourceWithRawResponse, + ToolRuntimeResourceWithStreamingResponse, + AsyncToolRuntimeResourceWithStreamingResponse, +) + +__all__ = [ + "RagToolResource", + "AsyncRagToolResource", + "RagToolResourceWithRawResponse", + "AsyncRagToolResourceWithRawResponse", + "RagToolResourceWithStreamingResponse", + "AsyncRagToolResourceWithStreamingResponse", + "ToolRuntimeResource", + "AsyncToolRuntimeResource", + "ToolRuntimeResourceWithRawResponse", + "AsyncToolRuntimeResourceWithRawResponse", + "ToolRuntimeResourceWithStreamingResponse", + "AsyncToolRuntimeResourceWithStreamingResponse", +] diff --git a/src/llama_stack_client/resources/tool_runtime/rag_tool.py b/src/llama_stack_client/resources/tool_runtime/rag_tool.py new file mode 100644 index 00000000..c7f49758 --- /dev/null +++ b/src/llama_stack_client/resources/tool_runtime/rag_tool.py @@ -0,0 +1,330 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List, Iterable + +import httpx + +from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven +from ..._utils import ( + maybe_transform, + strip_not_given, + async_maybe_transform, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from ..._base_client import make_request_options +from ...types.tool_runtime import rag_tool_query_params, rag_tool_insert_params +from ...types.tool_runtime.query_result import QueryResult +from ...types.tool_runtime.document_param import DocumentParam +from ...types.tool_runtime.query_config_param import QueryConfigParam +from ...types.shared_params.interleaved_content import InterleavedContent + +__all__ = ["RagToolResource", "AsyncRagToolResource"] + + +class RagToolResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> RagToolResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return RagToolResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> RagToolResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return RagToolResourceWithStreamingResponse(self) + + def insert( + self, + *, + chunk_size_in_tokens: int, + documents: Iterable[DocumentParam], + vector_db_id: str, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """ + Index documents so they can be used by the RAG system + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return self._post( + "/v1/tool-runtime/rag-tool/insert", + body=maybe_transform( + { + "chunk_size_in_tokens": chunk_size_in_tokens, + "documents": documents, + "vector_db_id": vector_db_id, + }, + rag_tool_insert_params.RagToolInsertParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + def query( + self, + *, + content: InterleavedContent, + vector_db_ids: List[str], + query_config: QueryConfigParam | NotGiven = NOT_GIVEN, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> QueryResult: + """ + Query the RAG system for context; typically invoked by the agent + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return self._post( + "/v1/tool-runtime/rag-tool/query", + body=maybe_transform( + { + "content": content, + "vector_db_ids": vector_db_ids, + "query_config": query_config, + }, + rag_tool_query_params.RagToolQueryParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=QueryResult, + ) + + +class AsyncRagToolResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncRagToolResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return AsyncRagToolResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncRagToolResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return AsyncRagToolResourceWithStreamingResponse(self) + + async def insert( + self, + *, + chunk_size_in_tokens: int, + documents: Iterable[DocumentParam], + vector_db_id: str, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """ + Index documents so they can be used by the RAG system + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return await self._post( + "/v1/tool-runtime/rag-tool/insert", + body=await async_maybe_transform( + { + "chunk_size_in_tokens": chunk_size_in_tokens, + "documents": documents, + "vector_db_id": vector_db_id, + }, + rag_tool_insert_params.RagToolInsertParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + async def query( + self, + *, + content: InterleavedContent, + vector_db_ids: List[str], + query_config: QueryConfigParam | NotGiven = NOT_GIVEN, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> QueryResult: + """ + Query the RAG system for context; typically invoked by the agent + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return await self._post( + "/v1/tool-runtime/rag-tool/query", + body=await async_maybe_transform( + { + "content": content, + "vector_db_ids": vector_db_ids, + "query_config": query_config, + }, + rag_tool_query_params.RagToolQueryParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=QueryResult, + ) + + +class RagToolResourceWithRawResponse: + def __init__(self, rag_tool: RagToolResource) -> None: + self._rag_tool = rag_tool + + self.insert = to_raw_response_wrapper( + rag_tool.insert, + ) + self.query = to_raw_response_wrapper( + rag_tool.query, + ) + + +class AsyncRagToolResourceWithRawResponse: + def __init__(self, rag_tool: AsyncRagToolResource) -> None: + self._rag_tool = rag_tool + + self.insert = async_to_raw_response_wrapper( + rag_tool.insert, + ) + self.query = async_to_raw_response_wrapper( + rag_tool.query, + ) + + +class RagToolResourceWithStreamingResponse: + def __init__(self, rag_tool: RagToolResource) -> None: + self._rag_tool = rag_tool + + self.insert = to_streamed_response_wrapper( + rag_tool.insert, + ) + self.query = to_streamed_response_wrapper( + rag_tool.query, + ) + + +class AsyncRagToolResourceWithStreamingResponse: + def __init__(self, rag_tool: AsyncRagToolResource) -> None: + self._rag_tool = rag_tool + + self.insert = async_to_streamed_response_wrapper( + rag_tool.insert, + ) + self.query = async_to_streamed_response_wrapper( + rag_tool.query, + ) diff --git a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py new file mode 100644 index 00000000..cf2a7a45 --- /dev/null +++ b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py @@ -0,0 +1,358 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, Union, Iterable + +import httpx + +from ...types import tool_runtime_list_tools_params, tool_runtime_invoke_tool_params +from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from ..._utils import ( + maybe_transform, + strip_not_given, + async_maybe_transform, +) +from .rag_tool import ( + RagToolResource, + AsyncRagToolResource, + RagToolResourceWithRawResponse, + AsyncRagToolResourceWithRawResponse, + RagToolResourceWithStreamingResponse, + AsyncRagToolResourceWithStreamingResponse, +) +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from ..._base_client import make_request_options +from ...types.tool_def import ToolDef +from ..._decoders.jsonl import JSONLDecoder, AsyncJSONLDecoder +from ...types.shared_params.url import URL +from ...types.tool_invocation_result import ToolInvocationResult + +__all__ = ["ToolRuntimeResource", "AsyncToolRuntimeResource"] + + +class ToolRuntimeResource(SyncAPIResource): + @cached_property + def rag_tool(self) -> RagToolResource: + return RagToolResource(self._client) + + @cached_property + def with_raw_response(self) -> ToolRuntimeResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return ToolRuntimeResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> ToolRuntimeResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return ToolRuntimeResourceWithStreamingResponse(self) + + def invoke_tool( + self, + *, + kwargs: Dict[str, Union[bool, float, str, Iterable[object], object, None]], + tool_name: str, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ToolInvocationResult: + """ + Run a tool with the given arguments + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return self._post( + "/v1/tool-runtime/invoke", + body=maybe_transform( + { + "kwargs": kwargs, + "tool_name": tool_name, + }, + tool_runtime_invoke_tool_params.ToolRuntimeInvokeToolParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ToolInvocationResult, + ) + + def list_tools( + self, + *, + mcp_endpoint: URL | NotGiven = NOT_GIVEN, + tool_group_id: str | NotGiven = NOT_GIVEN, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> JSONLDecoder[ToolDef]: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"Accept": "application/jsonl", **(extra_headers or {})} + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return self._get( + "/v1/tool-runtime/list-tools", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=maybe_transform( + { + "mcp_endpoint": mcp_endpoint, + "tool_group_id": tool_group_id, + }, + tool_runtime_list_tools_params.ToolRuntimeListToolsParams, + ), + ), + cast_to=JSONLDecoder[ToolDef], + stream=True, + ) + + +class AsyncToolRuntimeResource(AsyncAPIResource): + @cached_property + def rag_tool(self) -> AsyncRagToolResource: + return AsyncRagToolResource(self._client) + + @cached_property + def with_raw_response(self) -> AsyncToolRuntimeResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return AsyncToolRuntimeResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncToolRuntimeResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return AsyncToolRuntimeResourceWithStreamingResponse(self) + + async def invoke_tool( + self, + *, + kwargs: Dict[str, Union[bool, float, str, Iterable[object], object, None]], + tool_name: str, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ToolInvocationResult: + """ + Run a tool with the given arguments + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return await self._post( + "/v1/tool-runtime/invoke", + body=await async_maybe_transform( + { + "kwargs": kwargs, + "tool_name": tool_name, + }, + tool_runtime_invoke_tool_params.ToolRuntimeInvokeToolParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ToolInvocationResult, + ) + + async def list_tools( + self, + *, + mcp_endpoint: URL | NotGiven = NOT_GIVEN, + tool_group_id: str | NotGiven = NOT_GIVEN, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncJSONLDecoder[ToolDef]: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"Accept": "application/jsonl", **(extra_headers or {})} + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return await self._get( + "/v1/tool-runtime/list-tools", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + query=await async_maybe_transform( + { + "mcp_endpoint": mcp_endpoint, + "tool_group_id": tool_group_id, + }, + tool_runtime_list_tools_params.ToolRuntimeListToolsParams, + ), + ), + cast_to=AsyncJSONLDecoder[ToolDef], + stream=True, + ) + + +class ToolRuntimeResourceWithRawResponse: + def __init__(self, tool_runtime: ToolRuntimeResource) -> None: + self._tool_runtime = tool_runtime + + self.invoke_tool = to_raw_response_wrapper( + tool_runtime.invoke_tool, + ) + self.list_tools = to_raw_response_wrapper( + tool_runtime.list_tools, + ) + + @cached_property + def rag_tool(self) -> RagToolResourceWithRawResponse: + return RagToolResourceWithRawResponse(self._tool_runtime.rag_tool) + + +class AsyncToolRuntimeResourceWithRawResponse: + def __init__(self, tool_runtime: AsyncToolRuntimeResource) -> None: + self._tool_runtime = tool_runtime + + self.invoke_tool = async_to_raw_response_wrapper( + tool_runtime.invoke_tool, + ) + self.list_tools = async_to_raw_response_wrapper( + tool_runtime.list_tools, + ) + + @cached_property + def rag_tool(self) -> AsyncRagToolResourceWithRawResponse: + return AsyncRagToolResourceWithRawResponse(self._tool_runtime.rag_tool) + + +class ToolRuntimeResourceWithStreamingResponse: + def __init__(self, tool_runtime: ToolRuntimeResource) -> None: + self._tool_runtime = tool_runtime + + self.invoke_tool = to_streamed_response_wrapper( + tool_runtime.invoke_tool, + ) + self.list_tools = to_streamed_response_wrapper( + tool_runtime.list_tools, + ) + + @cached_property + def rag_tool(self) -> RagToolResourceWithStreamingResponse: + return RagToolResourceWithStreamingResponse(self._tool_runtime.rag_tool) + + +class AsyncToolRuntimeResourceWithStreamingResponse: + def __init__(self, tool_runtime: AsyncToolRuntimeResource) -> None: + self._tool_runtime = tool_runtime + + self.invoke_tool = async_to_streamed_response_wrapper( + tool_runtime.invoke_tool, + ) + self.list_tools = async_to_streamed_response_wrapper( + tool_runtime.list_tools, + ) + + @cached_property + def rag_tool(self) -> AsyncRagToolResourceWithStreamingResponse: + return AsyncRagToolResourceWithStreamingResponse(self._tool_runtime.rag_tool) diff --git a/src/llama_stack_client/resources/toolgroups.py b/src/llama_stack_client/resources/toolgroups.py index 5d65f9ef..b318963a 100644 --- a/src/llama_stack_client/resources/toolgroups.py +++ b/src/llama_stack_client/resources/toolgroups.py @@ -34,7 +34,7 @@ class ToolgroupsResource(SyncAPIResource): @cached_property def with_raw_response(self) -> ToolgroupsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -242,7 +242,7 @@ class AsyncToolgroupsResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncToolgroupsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/tools.py b/src/llama_stack_client/resources/tools.py index b91c00c2..23176ba8 100644 --- a/src/llama_stack_client/resources/tools.py +++ b/src/llama_stack_client/resources/tools.py @@ -33,7 +33,7 @@ class ToolsResource(SyncAPIResource): @cached_property def with_raw_response(self) -> ToolsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers @@ -143,7 +143,7 @@ class AsyncToolsResource(AsyncAPIResource): @cached_property def with_raw_response(self) -> AsyncToolsResourceWithRawResponse: """ - This property can be used as a prefix for any HTTP method call to return the + This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers diff --git a/src/llama_stack_client/resources/vector_dbs.py b/src/llama_stack_client/resources/vector_dbs.py new file mode 100644 index 00000000..175edda8 --- /dev/null +++ b/src/llama_stack_client/resources/vector_dbs.py @@ -0,0 +1,508 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Type, Optional, cast + +import httpx + +from ..types import vector_db_register_params +from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven +from .._utils import ( + maybe_transform, + strip_not_given, + async_maybe_transform, +) +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from .._wrappers import DataWrapper +from .._base_client import make_request_options +from ..types.vector_db_list_response import VectorDBListResponse +from ..types.vector_db_register_response import VectorDBRegisterResponse +from ..types.vector_db_retrieve_response import VectorDBRetrieveResponse + +__all__ = ["VectorDBsResource", "AsyncVectorDBsResource"] + + +class VectorDBsResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> VectorDBsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return VectorDBsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> VectorDBsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return VectorDBsResourceWithStreamingResponse(self) + + def retrieve( + self, + vector_db_id: str, + *, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Optional[VectorDBRetrieveResponse]: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_db_id: + raise ValueError(f"Expected a non-empty value for `vector_db_id` but received {vector_db_id!r}") + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return self._get( + f"/v1/vector-dbs/{vector_db_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorDBRetrieveResponse, + ) + + def list( + self, + *, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorDBListResponse: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return self._get( + "/v1/vector-dbs", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=DataWrapper[VectorDBListResponse]._unwrapper, + ), + cast_to=cast(Type[VectorDBListResponse], DataWrapper[VectorDBListResponse]), + ) + + def register( + self, + *, + embedding_model: str, + vector_db_id: str, + embedding_dimension: int | NotGiven = NOT_GIVEN, + provider_id: str | NotGiven = NOT_GIVEN, + provider_vector_db_id: str | NotGiven = NOT_GIVEN, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorDBRegisterResponse: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return self._post( + "/v1/vector-dbs", + body=maybe_transform( + { + "embedding_model": embedding_model, + "vector_db_id": vector_db_id, + "embedding_dimension": embedding_dimension, + "provider_id": provider_id, + "provider_vector_db_id": provider_vector_db_id, + }, + vector_db_register_params.VectorDBRegisterParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorDBRegisterResponse, + ) + + def unregister( + self, + vector_db_id: str, + *, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_db_id: + raise ValueError(f"Expected a non-empty value for `vector_db_id` but received {vector_db_id!r}") + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return self._delete( + f"/v1/vector-dbs/{vector_db_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + +class AsyncVectorDBsResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncVectorDBsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return AsyncVectorDBsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncVectorDBsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return AsyncVectorDBsResourceWithStreamingResponse(self) + + async def retrieve( + self, + vector_db_id: str, + *, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Optional[VectorDBRetrieveResponse]: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_db_id: + raise ValueError(f"Expected a non-empty value for `vector_db_id` but received {vector_db_id!r}") + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return await self._get( + f"/v1/vector-dbs/{vector_db_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorDBRetrieveResponse, + ) + + async def list( + self, + *, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorDBListResponse: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return await self._get( + "/v1/vector-dbs", + options=make_request_options( + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + post_parser=DataWrapper[VectorDBListResponse]._unwrapper, + ), + cast_to=cast(Type[VectorDBListResponse], DataWrapper[VectorDBListResponse]), + ) + + async def register( + self, + *, + embedding_model: str, + vector_db_id: str, + embedding_dimension: int | NotGiven = NOT_GIVEN, + provider_id: str | NotGiven = NOT_GIVEN, + provider_vector_db_id: str | NotGiven = NOT_GIVEN, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> VectorDBRegisterResponse: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return await self._post( + "/v1/vector-dbs", + body=await async_maybe_transform( + { + "embedding_model": embedding_model, + "vector_db_id": vector_db_id, + "embedding_dimension": embedding_dimension, + "provider_id": provider_id, + "provider_vector_db_id": provider_vector_db_id, + }, + vector_db_register_params.VectorDBRegisterParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=VectorDBRegisterResponse, + ) + + async def unregister( + self, + vector_db_id: str, + *, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not vector_db_id: + raise ValueError(f"Expected a non-empty value for `vector_db_id` but received {vector_db_id!r}") + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return await self._delete( + f"/v1/vector-dbs/{vector_db_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + +class VectorDBsResourceWithRawResponse: + def __init__(self, vector_dbs: VectorDBsResource) -> None: + self._vector_dbs = vector_dbs + + self.retrieve = to_raw_response_wrapper( + vector_dbs.retrieve, + ) + self.list = to_raw_response_wrapper( + vector_dbs.list, + ) + self.register = to_raw_response_wrapper( + vector_dbs.register, + ) + self.unregister = to_raw_response_wrapper( + vector_dbs.unregister, + ) + + +class AsyncVectorDBsResourceWithRawResponse: + def __init__(self, vector_dbs: AsyncVectorDBsResource) -> None: + self._vector_dbs = vector_dbs + + self.retrieve = async_to_raw_response_wrapper( + vector_dbs.retrieve, + ) + self.list = async_to_raw_response_wrapper( + vector_dbs.list, + ) + self.register = async_to_raw_response_wrapper( + vector_dbs.register, + ) + self.unregister = async_to_raw_response_wrapper( + vector_dbs.unregister, + ) + + +class VectorDBsResourceWithStreamingResponse: + def __init__(self, vector_dbs: VectorDBsResource) -> None: + self._vector_dbs = vector_dbs + + self.retrieve = to_streamed_response_wrapper( + vector_dbs.retrieve, + ) + self.list = to_streamed_response_wrapper( + vector_dbs.list, + ) + self.register = to_streamed_response_wrapper( + vector_dbs.register, + ) + self.unregister = to_streamed_response_wrapper( + vector_dbs.unregister, + ) + + +class AsyncVectorDBsResourceWithStreamingResponse: + def __init__(self, vector_dbs: AsyncVectorDBsResource) -> None: + self._vector_dbs = vector_dbs + + self.retrieve = async_to_streamed_response_wrapper( + vector_dbs.retrieve, + ) + self.list = async_to_streamed_response_wrapper( + vector_dbs.list, + ) + self.register = async_to_streamed_response_wrapper( + vector_dbs.register, + ) + self.unregister = async_to_streamed_response_wrapper( + vector_dbs.unregister, + ) diff --git a/src/llama_stack_client/resources/vector_io.py b/src/llama_stack_client/resources/vector_io.py new file mode 100644 index 00000000..bc96c5cb --- /dev/null +++ b/src/llama_stack_client/resources/vector_io.py @@ -0,0 +1,320 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, Union, Iterable + +import httpx + +from ..types import vector_io_query_params, vector_io_insert_params +from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven +from .._utils import ( + maybe_transform, + strip_not_given, + async_maybe_transform, +) +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from .._base_client import make_request_options +from ..types.query_chunks_response import QueryChunksResponse +from ..types.shared_params.interleaved_content import InterleavedContent + +__all__ = ["VectorIoResource", "AsyncVectorIoResource"] + + +class VectorIoResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> VectorIoResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return VectorIoResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> VectorIoResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return VectorIoResourceWithStreamingResponse(self) + + def insert( + self, + *, + chunks: Iterable[vector_io_insert_params.Chunk], + vector_db_id: str, + ttl_seconds: int | NotGiven = NOT_GIVEN, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return self._post( + "/v1/vector-io/insert", + body=maybe_transform( + { + "chunks": chunks, + "vector_db_id": vector_db_id, + "ttl_seconds": ttl_seconds, + }, + vector_io_insert_params.VectorIoInsertParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + def query( + self, + *, + query: InterleavedContent, + vector_db_id: str, + params: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> QueryChunksResponse: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return self._post( + "/v1/vector-io/query", + body=maybe_transform( + { + "query": query, + "vector_db_id": vector_db_id, + "params": params, + }, + vector_io_query_params.VectorIoQueryParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=QueryChunksResponse, + ) + + +class AsyncVectorIoResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncVectorIoResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return AsyncVectorIoResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncVectorIoResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return AsyncVectorIoResourceWithStreamingResponse(self) + + async def insert( + self, + *, + chunks: Iterable[vector_io_insert_params.Chunk], + vector_db_id: str, + ttl_seconds: int | NotGiven = NOT_GIVEN, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> None: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = {"Accept": "*/*", **(extra_headers or {})} + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return await self._post( + "/v1/vector-io/insert", + body=await async_maybe_transform( + { + "chunks": chunks, + "vector_db_id": vector_db_id, + "ttl_seconds": ttl_seconds, + }, + vector_io_insert_params.VectorIoInsertParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=NoneType, + ) + + async def query( + self, + *, + query: InterleavedContent, + vector_db_id: str, + params: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN, + x_llama_stack_client_version: str | NotGiven = NOT_GIVEN, + x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> QueryChunksResponse: + """ + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + extra_headers = { + **strip_not_given( + { + "X-LlamaStack-Client-Version": x_llama_stack_client_version, + "X-LlamaStack-Provider-Data": x_llama_stack_provider_data, + } + ), + **(extra_headers or {}), + } + return await self._post( + "/v1/vector-io/query", + body=await async_maybe_transform( + { + "query": query, + "vector_db_id": vector_db_id, + "params": params, + }, + vector_io_query_params.VectorIoQueryParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=QueryChunksResponse, + ) + + +class VectorIoResourceWithRawResponse: + def __init__(self, vector_io: VectorIoResource) -> None: + self._vector_io = vector_io + + self.insert = to_raw_response_wrapper( + vector_io.insert, + ) + self.query = to_raw_response_wrapper( + vector_io.query, + ) + + +class AsyncVectorIoResourceWithRawResponse: + def __init__(self, vector_io: AsyncVectorIoResource) -> None: + self._vector_io = vector_io + + self.insert = async_to_raw_response_wrapper( + vector_io.insert, + ) + self.query = async_to_raw_response_wrapper( + vector_io.query, + ) + + +class VectorIoResourceWithStreamingResponse: + def __init__(self, vector_io: VectorIoResource) -> None: + self._vector_io = vector_io + + self.insert = to_streamed_response_wrapper( + vector_io.insert, + ) + self.query = to_streamed_response_wrapper( + vector_io.query, + ) + + +class AsyncVectorIoResourceWithStreamingResponse: + def __init__(self, vector_io: AsyncVectorIoResource) -> None: + self._vector_io = vector_io + + self.insert = async_to_streamed_response_wrapper( + vector_io.insert, + ) + self.query = async_to_streamed_response_wrapper( + vector_io.query, + ) diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py index 39fd5855..ecd60a21 100644 --- a/src/llama_stack_client/types/__init__.py +++ b/src/llama_stack_client/types/__init__.py @@ -47,7 +47,6 @@ from .completion_response import CompletionResponse as CompletionResponse from .embeddings_response import EmbeddingsResponse as EmbeddingsResponse from .list_tools_response import ListToolsResponse as ListToolsResponse -from .memory_query_params import MemoryQueryParams as MemoryQueryParams from .model_list_response import ModelListResponse as ModelListResponse from .route_list_response import RouteListResponse as RouteListResponse from .run_shield_response import RunShieldResponse as RunShieldResponse @@ -55,7 +54,6 @@ from .eval_run_eval_params import EvalRunEvalParams as EvalRunEvalParams from .list_models_response import ListModelsResponse as ListModelsResponse from .list_routes_response import ListRoutesResponse as ListRoutesResponse -from .memory_insert_params import MemoryInsertParams as MemoryInsertParams from .query_spans_response import QuerySpansResponse as QuerySpansResponse from .scoring_score_params import ScoringScoreParams as ScoringScoreParams from .shield_list_response import ShieldListResponse as ShieldListResponse @@ -65,37 +63,39 @@ from .memory_retrieval_step import MemoryRetrievalStep as MemoryRetrievalStep from .model_register_params import ModelRegisterParams as ModelRegisterParams from .paginated_rows_result import PaginatedRowsResult as PaginatedRowsResult +from .query_chunks_response import QueryChunksResponse as QueryChunksResponse from .list_datasets_response import ListDatasetsResponse as ListDatasetsResponse from .provider_list_response import ProviderListResponse as ProviderListResponse from .scoring_score_response import ScoringScoreResponse as ScoringScoreResponse from .shield_register_params import ShieldRegisterParams as ShieldRegisterParams from .tool_invocation_result import ToolInvocationResult as ToolInvocationResult +from .vector_io_query_params import VectorIoQueryParams as VectorIoQueryParams from .dataset_register_params import DatasetRegisterParams as DatasetRegisterParams from .eval_task_list_response import EvalTaskListResponse as EvalTaskListResponse from .list_providers_response import ListProvidersResponse as ListProvidersResponse from .toolgroup_list_response import ToolgroupListResponse as ToolgroupListResponse +from .vector_db_list_response import VectorDBListResponse as VectorDBListResponse +from .vector_io_insert_params import VectorIoInsertParams as VectorIoInsertParams from .list_eval_tasks_response import ListEvalTasksResponse as ListEvalTasksResponse -from .query_documents_response import QueryDocumentsResponse as QueryDocumentsResponse +from .list_vector_dbs_response import ListVectorDBsResponse as ListVectorDBsResponse from .safety_run_shield_params import SafetyRunShieldParams as SafetyRunShieldParams from .dataset_retrieve_response import DatasetRetrieveResponse as DatasetRetrieveResponse from .eval_evaluate_rows_params import EvalEvaluateRowsParams as EvalEvaluateRowsParams from .eval_task_register_params import EvalTaskRegisterParams as EvalTaskRegisterParams from .list_tool_groups_response import ListToolGroupsResponse as ListToolGroupsResponse -from .memory_bank_list_response import MemoryBankListResponse as MemoryBankListResponse from .toolgroup_register_params import ToolgroupRegisterParams as ToolgroupRegisterParams -from .list_memory_banks_response import ListMemoryBanksResponse as ListMemoryBanksResponse +from .vector_db_register_params import VectorDBRegisterParams as VectorDBRegisterParams from .scoring_score_batch_params import ScoringScoreBatchParams as ScoringScoreBatchParams from .telemetry_log_event_params import TelemetryLogEventParams as TelemetryLogEventParams from .inference_completion_params import InferenceCompletionParams as InferenceCompletionParams from .inference_embeddings_params import InferenceEmbeddingsParams as InferenceEmbeddingsParams -from .memory_bank_register_params import MemoryBankRegisterParams as MemoryBankRegisterParams from .telemetry_get_span_response import TelemetryGetSpanResponse as TelemetryGetSpanResponse +from .vector_db_register_response import VectorDBRegisterResponse as VectorDBRegisterResponse +from .vector_db_retrieve_response import VectorDBRetrieveResponse as VectorDBRetrieveResponse from .datasetio_append_rows_params import DatasetioAppendRowsParams as DatasetioAppendRowsParams from .scoring_score_batch_response import ScoringScoreBatchResponse as ScoringScoreBatchResponse from .telemetry_query_spans_params import TelemetryQuerySpansParams as TelemetryQuerySpansParams from .inference_completion_response import InferenceCompletionResponse as InferenceCompletionResponse -from .memory_bank_register_response import MemoryBankRegisterResponse as MemoryBankRegisterResponse -from .memory_bank_retrieve_response import MemoryBankRetrieveResponse as MemoryBankRetrieveResponse from .telemetry_query_traces_params import TelemetryQueryTracesParams as TelemetryQueryTracesParams from .scoring_function_list_response import ScoringFunctionListResponse as ScoringFunctionListResponse from .telemetry_get_span_tree_params import TelemetryGetSpanTreeParams as TelemetryGetSpanTreeParams diff --git a/src/llama_stack_client/types/agents/session.py b/src/llama_stack_client/types/agents/session.py index b8c2c305..707c4cbf 100644 --- a/src/llama_stack_client/types/agents/session.py +++ b/src/llama_stack_client/types/agents/session.py @@ -1,81 +1,12 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import List, Union, Optional +from typing import List from datetime import datetime -from typing_extensions import Literal, TypeAlias from .turn import Turn from ..._models import BaseModel -__all__ = [ - "Session", - "MemoryBank", - "MemoryBankVectorMemoryBank", - "MemoryBankKeyValueMemoryBank", - "MemoryBankKeywordMemoryBank", - "MemoryBankGraphMemoryBank", -] - - -class MemoryBankVectorMemoryBank(BaseModel): - chunk_size_in_tokens: int - - embedding_model: str - - identifier: str - - memory_bank_type: Literal["vector"] - - provider_id: str - - provider_resource_id: str - - type: Literal["memory_bank"] - - embedding_dimension: Optional[int] = None - - overlap_size_in_tokens: Optional[int] = None - - -class MemoryBankKeyValueMemoryBank(BaseModel): - identifier: str - - memory_bank_type: Literal["keyvalue"] - - provider_id: str - - provider_resource_id: str - - type: Literal["memory_bank"] - - -class MemoryBankKeywordMemoryBank(BaseModel): - identifier: str - - memory_bank_type: Literal["keyword"] - - provider_id: str - - provider_resource_id: str - - type: Literal["memory_bank"] - - -class MemoryBankGraphMemoryBank(BaseModel): - identifier: str - - memory_bank_type: Literal["graph"] - - provider_id: str - - provider_resource_id: str - - type: Literal["memory_bank"] - - -MemoryBank: TypeAlias = Union[ - MemoryBankVectorMemoryBank, MemoryBankKeyValueMemoryBank, MemoryBankKeywordMemoryBank, MemoryBankGraphMemoryBank -] +__all__ = ["Session"] class Session(BaseModel): @@ -86,5 +17,3 @@ class Session(BaseModel): started_at: datetime turns: List[Turn] - - memory_bank: Optional[MemoryBank] = None diff --git a/src/llama_stack_client/types/list_vector_dbs_response.py b/src/llama_stack_client/types/list_vector_dbs_response.py new file mode 100644 index 00000000..1b293a6a --- /dev/null +++ b/src/llama_stack_client/types/list_vector_dbs_response.py @@ -0,0 +1,26 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = ["ListVectorDBsResponse", "Data"] + + +class Data(BaseModel): + embedding_dimension: int + + embedding_model: str + + identifier: str + + provider_id: str + + provider_resource_id: str + + type: Literal["vector_db"] + + +class ListVectorDBsResponse(BaseModel): + data: List[Data] diff --git a/src/llama_stack_client/types/memory_retrieval_step.py b/src/llama_stack_client/types/memory_retrieval_step.py index 3bd59563..2d5840c6 100644 --- a/src/llama_stack_client/types/memory_retrieval_step.py +++ b/src/llama_stack_client/types/memory_retrieval_step.py @@ -1,6 +1,6 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import List, Optional +from typing import Optional from datetime import datetime from typing_extensions import Literal @@ -13,14 +13,14 @@ class MemoryRetrievalStep(BaseModel): inserted_context: InterleavedContent - memory_bank_ids: List[str] - step_id: str step_type: Literal["memory_retrieval"] turn_id: str + vector_db_ids: str + completed_at: Optional[datetime] = None started_at: Optional[datetime] = None diff --git a/src/llama_stack_client/types/query_chunks_response.py b/src/llama_stack_client/types/query_chunks_response.py new file mode 100644 index 00000000..4fb11316 --- /dev/null +++ b/src/llama_stack_client/types/query_chunks_response.py @@ -0,0 +1,20 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Dict, List, Union + +from .._models import BaseModel +from .shared.interleaved_content import InterleavedContent + +__all__ = ["QueryChunksResponse", "Chunk"] + + +class Chunk(BaseModel): + content: InterleavedContent + + metadata: Dict[str, Union[bool, float, str, List[object], object, None]] + + +class QueryChunksResponse(BaseModel): + chunks: List[Chunk] + + scores: List[float] diff --git a/src/llama_stack_client/types/tool_runtime/__init__.py b/src/llama_stack_client/types/tool_runtime/__init__.py new file mode 100644 index 00000000..27283e7a --- /dev/null +++ b/src/llama_stack_client/types/tool_runtime/__init__.py @@ -0,0 +1,9 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from .query_result import QueryResult as QueryResult +from .document_param import DocumentParam as DocumentParam +from .query_config_param import QueryConfigParam as QueryConfigParam +from .rag_tool_query_params import RagToolQueryParams as RagToolQueryParams +from .rag_tool_insert_params import RagToolInsertParams as RagToolInsertParams diff --git a/src/llama_stack_client/types/tool_runtime/document_param.py b/src/llama_stack_client/types/tool_runtime/document_param.py new file mode 100644 index 00000000..7df99c11 --- /dev/null +++ b/src/llama_stack_client/types/tool_runtime/document_param.py @@ -0,0 +1,38 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, Union, Iterable +from typing_extensions import Literal, Required, TypeAlias, TypedDict + +from ..shared_params.url import URL +from ..shared_params.interleaved_content_item import InterleavedContentItem + +__all__ = ["DocumentParam", "Content", "ContentImageContentItem", "ContentTextContentItem"] + + +class ContentImageContentItem(TypedDict, total=False): + type: Required[Literal["image"]] + + data: str + + url: URL + + +class ContentTextContentItem(TypedDict, total=False): + text: Required[str] + + type: Required[Literal["text"]] + + +Content: TypeAlias = Union[str, ContentImageContentItem, ContentTextContentItem, Iterable[InterleavedContentItem], URL] + + +class DocumentParam(TypedDict, total=False): + content: Required[Content] + + document_id: Required[str] + + metadata: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + + mime_type: str diff --git a/src/llama_stack_client/types/tool_runtime/query_config_param.py b/src/llama_stack_client/types/tool_runtime/query_config_param.py new file mode 100644 index 00000000..6b106e50 --- /dev/null +++ b/src/llama_stack_client/types/tool_runtime/query_config_param.py @@ -0,0 +1,40 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union +from typing_extensions import Literal, Required, TypeAlias, TypedDict + +__all__ = [ + "QueryConfigParam", + "QueryGeneratorConfig", + "QueryGeneratorConfigDefaultRagQueryGeneratorConfig", + "QueryGeneratorConfigLlmragQueryGeneratorConfig", +] + + +class QueryGeneratorConfigDefaultRagQueryGeneratorConfig(TypedDict, total=False): + separator: Required[str] + + type: Required[Literal["default"]] + + +class QueryGeneratorConfigLlmragQueryGeneratorConfig(TypedDict, total=False): + model: Required[str] + + template: Required[str] + + type: Required[Literal["llm"]] + + +QueryGeneratorConfig: TypeAlias = Union[ + QueryGeneratorConfigDefaultRagQueryGeneratorConfig, QueryGeneratorConfigLlmragQueryGeneratorConfig +] + + +class QueryConfigParam(TypedDict, total=False): + max_chunks: Required[int] + + max_tokens_in_context: Required[int] + + query_generator_config: Required[QueryGeneratorConfig] diff --git a/src/llama_stack_client/types/tool_runtime/query_result.py b/src/llama_stack_client/types/tool_runtime/query_result.py new file mode 100644 index 00000000..4486763b --- /dev/null +++ b/src/llama_stack_client/types/tool_runtime/query_result.py @@ -0,0 +1,12 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional + +from ..._models import BaseModel +from ..shared.interleaved_content import InterleavedContent + +__all__ = ["QueryResult"] + + +class QueryResult(BaseModel): + content: Optional[InterleavedContent] = None diff --git a/src/llama_stack_client/types/tool_runtime/rag_tool_insert_params.py b/src/llama_stack_client/types/tool_runtime/rag_tool_insert_params.py new file mode 100644 index 00000000..f9955884 --- /dev/null +++ b/src/llama_stack_client/types/tool_runtime/rag_tool_insert_params.py @@ -0,0 +1,23 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Iterable +from typing_extensions import Required, Annotated, TypedDict + +from ..._utils import PropertyInfo +from .document_param import DocumentParam + +__all__ = ["RagToolInsertParams"] + + +class RagToolInsertParams(TypedDict, total=False): + chunk_size_in_tokens: Required[int] + + documents: Required[Iterable[DocumentParam]] + + vector_db_id: Required[str] + + x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")] + + x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")] diff --git a/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py b/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py new file mode 100644 index 00000000..08086d81 --- /dev/null +++ b/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py @@ -0,0 +1,24 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import List +from typing_extensions import Required, Annotated, TypedDict + +from ..._utils import PropertyInfo +from .query_config_param import QueryConfigParam +from ..shared_params.interleaved_content import InterleavedContent + +__all__ = ["RagToolQueryParams"] + + +class RagToolQueryParams(TypedDict, total=False): + content: Required[InterleavedContent] + + vector_db_ids: Required[List[str]] + + query_config: QueryConfigParam + + x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")] + + x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")] diff --git a/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py b/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py index 78267b40..76354f7e 100644 --- a/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py +++ b/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py @@ -11,7 +11,7 @@ class ToolRuntimeInvokeToolParams(TypedDict, total=False): - args: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] + kwargs: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] tool_name: Required[str] diff --git a/src/llama_stack_client/types/vector_db_list_response.py b/src/llama_stack_client/types/vector_db_list_response.py new file mode 100644 index 00000000..0a110e2b --- /dev/null +++ b/src/llama_stack_client/types/vector_db_list_response.py @@ -0,0 +1,25 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List +from typing_extensions import Literal, TypeAlias + +from .._models import BaseModel + +__all__ = ["VectorDBListResponse", "VectorDBListResponseItem"] + + +class VectorDBListResponseItem(BaseModel): + embedding_dimension: int + + embedding_model: str + + identifier: str + + provider_id: str + + provider_resource_id: str + + type: Literal["vector_db"] + + +VectorDBListResponse: TypeAlias = List[VectorDBListResponseItem] diff --git a/src/llama_stack_client/types/vector_db_register_params.py b/src/llama_stack_client/types/vector_db_register_params.py new file mode 100644 index 00000000..09dc7eba --- /dev/null +++ b/src/llama_stack_client/types/vector_db_register_params.py @@ -0,0 +1,25 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Required, Annotated, TypedDict + +from .._utils import PropertyInfo + +__all__ = ["VectorDBRegisterParams"] + + +class VectorDBRegisterParams(TypedDict, total=False): + embedding_model: Required[str] + + vector_db_id: Required[str] + + embedding_dimension: int + + provider_id: str + + provider_vector_db_id: str + + x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")] + + x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")] diff --git a/src/llama_stack_client/types/vector_db_register_response.py b/src/llama_stack_client/types/vector_db_register_response.py new file mode 100644 index 00000000..cc4c201a --- /dev/null +++ b/src/llama_stack_client/types/vector_db_register_response.py @@ -0,0 +1,21 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = ["VectorDBRegisterResponse"] + + +class VectorDBRegisterResponse(BaseModel): + embedding_dimension: int + + embedding_model: str + + identifier: str + + provider_id: str + + provider_resource_id: str + + type: Literal["vector_db"] diff --git a/src/llama_stack_client/types/vector_db_retrieve_response.py b/src/llama_stack_client/types/vector_db_retrieve_response.py new file mode 100644 index 00000000..3bea2236 --- /dev/null +++ b/src/llama_stack_client/types/vector_db_retrieve_response.py @@ -0,0 +1,21 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing_extensions import Literal + +from .._models import BaseModel + +__all__ = ["VectorDBRetrieveResponse"] + + +class VectorDBRetrieveResponse(BaseModel): + embedding_dimension: int + + embedding_model: str + + identifier: str + + provider_id: str + + provider_resource_id: str + + type: Literal["vector_db"] diff --git a/src/llama_stack_client/types/vector_io_insert_params.py b/src/llama_stack_client/types/vector_io_insert_params.py new file mode 100644 index 00000000..9570bb57 --- /dev/null +++ b/src/llama_stack_client/types/vector_io_insert_params.py @@ -0,0 +1,29 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, Union, Iterable +from typing_extensions import Required, Annotated, TypedDict + +from .._utils import PropertyInfo +from .shared_params.interleaved_content import InterleavedContent + +__all__ = ["VectorIoInsertParams", "Chunk"] + + +class VectorIoInsertParams(TypedDict, total=False): + chunks: Required[Iterable[Chunk]] + + vector_db_id: Required[str] + + ttl_seconds: int + + x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")] + + x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")] + + +class Chunk(TypedDict, total=False): + content: Required[InterleavedContent] + + metadata: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] diff --git a/src/llama_stack_client/types/vector_io_query_params.py b/src/llama_stack_client/types/vector_io_query_params.py new file mode 100644 index 00000000..c626509b --- /dev/null +++ b/src/llama_stack_client/types/vector_io_query_params.py @@ -0,0 +1,23 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Dict, Union, Iterable +from typing_extensions import Required, Annotated, TypedDict + +from .._utils import PropertyInfo +from .shared_params.interleaved_content import InterleavedContent + +__all__ = ["VectorIoQueryParams"] + + +class VectorIoQueryParams(TypedDict, total=False): + query: Required[InterleavedContent] + + vector_db_id: Required[str] + + params: Dict[str, Union[bool, float, str, Iterable[object], object, None]] + + x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")] + + x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")] diff --git a/tests/api_resources/test_tool_runtime.py b/tests/api_resources/test_tool_runtime.py index 26019f95..a415cac1 100644 --- a/tests/api_resources/test_tool_runtime.py +++ b/tests/api_resources/test_tool_runtime.py @@ -13,6 +13,7 @@ ToolDef, ToolInvocationResult, ) +from llama_stack_client._decoders.jsonl import JSONLDecoder, AsyncJSONLDecoder base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") @@ -23,7 +24,7 @@ class TestToolRuntime: @parametrize def test_method_invoke_tool(self, client: LlamaStackClient) -> None: tool_runtime = client.tool_runtime.invoke_tool( - args={"foo": True}, + kwargs={"foo": True}, tool_name="tool_name", ) assert_matches_type(ToolInvocationResult, tool_runtime, path=["response"]) @@ -31,7 +32,7 @@ def test_method_invoke_tool(self, client: LlamaStackClient) -> None: @parametrize def test_method_invoke_tool_with_all_params(self, client: LlamaStackClient) -> None: tool_runtime = client.tool_runtime.invoke_tool( - args={"foo": True}, + kwargs={"foo": True}, tool_name="tool_name", x_llama_stack_client_version="X-LlamaStack-Client-Version", x_llama_stack_provider_data="X-LlamaStack-Provider-Data", @@ -41,7 +42,7 @@ def test_method_invoke_tool_with_all_params(self, client: LlamaStackClient) -> N @parametrize def test_raw_response_invoke_tool(self, client: LlamaStackClient) -> None: response = client.tool_runtime.with_raw_response.invoke_tool( - args={"foo": True}, + kwargs={"foo": True}, tool_name="tool_name", ) @@ -53,7 +54,7 @@ def test_raw_response_invoke_tool(self, client: LlamaStackClient) -> None: @parametrize def test_streaming_response_invoke_tool(self, client: LlamaStackClient) -> None: with client.tool_runtime.with_streaming_response.invoke_tool( - args={"foo": True}, + kwargs={"foo": True}, tool_name="tool_name", ) as response: assert not response.is_closed @@ -64,17 +65,13 @@ def test_streaming_response_invoke_tool(self, client: LlamaStackClient) -> None: assert cast(Any, response.is_closed) is True - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type application/jsonl, Prism mock server will fail" - ) + @pytest.mark.skip(reason="Prism doesn't support JSONL responses yet") @parametrize def test_method_list_tools(self, client: LlamaStackClient) -> None: tool_runtime = client.tool_runtime.list_tools() - assert_matches_type(ToolDef, tool_runtime, path=["response"]) + assert_matches_type(JSONLDecoder[ToolDef], tool_runtime, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type application/jsonl, Prism mock server will fail" - ) + @pytest.mark.skip(reason="Prism doesn't support JSONL responses yet") @parametrize def test_method_list_tools_with_all_params(self, client: LlamaStackClient) -> None: tool_runtime = client.tool_runtime.list_tools( @@ -83,11 +80,9 @@ def test_method_list_tools_with_all_params(self, client: LlamaStackClient) -> No x_llama_stack_client_version="X-LlamaStack-Client-Version", x_llama_stack_provider_data="X-LlamaStack-Provider-Data", ) - assert_matches_type(ToolDef, tool_runtime, path=["response"]) + assert_matches_type(JSONLDecoder[ToolDef], tool_runtime, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type application/jsonl, Prism mock server will fail" - ) + @pytest.mark.skip(reason="Prism doesn't support JSONL responses yet") @parametrize def test_raw_response_list_tools(self, client: LlamaStackClient) -> None: response = client.tool_runtime.with_raw_response.list_tools() @@ -95,11 +90,9 @@ def test_raw_response_list_tools(self, client: LlamaStackClient) -> None: assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" tool_runtime = response.parse() - assert_matches_type(ToolDef, tool_runtime, path=["response"]) + assert_matches_type(JSONLDecoder[ToolDef], tool_runtime, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type application/jsonl, Prism mock server will fail" - ) + @pytest.mark.skip(reason="Prism doesn't support JSONL responses yet") @parametrize def test_streaming_response_list_tools(self, client: LlamaStackClient) -> None: with client.tool_runtime.with_streaming_response.list_tools() as response: @@ -107,7 +100,7 @@ def test_streaming_response_list_tools(self, client: LlamaStackClient) -> None: assert response.http_request.headers.get("X-Stainless-Lang") == "python" tool_runtime = response.parse() - assert_matches_type(ToolDef, tool_runtime, path=["response"]) + assert_matches_type(JSONLDecoder[ToolDef], tool_runtime, path=["response"]) assert cast(Any, response.is_closed) is True @@ -118,7 +111,7 @@ class TestAsyncToolRuntime: @parametrize async def test_method_invoke_tool(self, async_client: AsyncLlamaStackClient) -> None: tool_runtime = await async_client.tool_runtime.invoke_tool( - args={"foo": True}, + kwargs={"foo": True}, tool_name="tool_name", ) assert_matches_type(ToolInvocationResult, tool_runtime, path=["response"]) @@ -126,7 +119,7 @@ async def test_method_invoke_tool(self, async_client: AsyncLlamaStackClient) -> @parametrize async def test_method_invoke_tool_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: tool_runtime = await async_client.tool_runtime.invoke_tool( - args={"foo": True}, + kwargs={"foo": True}, tool_name="tool_name", x_llama_stack_client_version="X-LlamaStack-Client-Version", x_llama_stack_provider_data="X-LlamaStack-Provider-Data", @@ -136,7 +129,7 @@ async def test_method_invoke_tool_with_all_params(self, async_client: AsyncLlama @parametrize async def test_raw_response_invoke_tool(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.tool_runtime.with_raw_response.invoke_tool( - args={"foo": True}, + kwargs={"foo": True}, tool_name="tool_name", ) @@ -148,7 +141,7 @@ async def test_raw_response_invoke_tool(self, async_client: AsyncLlamaStackClien @parametrize async def test_streaming_response_invoke_tool(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.tool_runtime.with_streaming_response.invoke_tool( - args={"foo": True}, + kwargs={"foo": True}, tool_name="tool_name", ) as response: assert not response.is_closed @@ -159,17 +152,13 @@ async def test_streaming_response_invoke_tool(self, async_client: AsyncLlamaStac assert cast(Any, response.is_closed) is True - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type application/jsonl, Prism mock server will fail" - ) + @pytest.mark.skip(reason="Prism doesn't support JSONL responses yet") @parametrize async def test_method_list_tools(self, async_client: AsyncLlamaStackClient) -> None: tool_runtime = await async_client.tool_runtime.list_tools() - assert_matches_type(ToolDef, tool_runtime, path=["response"]) + assert_matches_type(AsyncJSONLDecoder[ToolDef], tool_runtime, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type application/jsonl, Prism mock server will fail" - ) + @pytest.mark.skip(reason="Prism doesn't support JSONL responses yet") @parametrize async def test_method_list_tools_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: tool_runtime = await async_client.tool_runtime.list_tools( @@ -178,11 +167,9 @@ async def test_method_list_tools_with_all_params(self, async_client: AsyncLlamaS x_llama_stack_client_version="X-LlamaStack-Client-Version", x_llama_stack_provider_data="X-LlamaStack-Provider-Data", ) - assert_matches_type(ToolDef, tool_runtime, path=["response"]) + assert_matches_type(AsyncJSONLDecoder[ToolDef], tool_runtime, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type application/jsonl, Prism mock server will fail" - ) + @pytest.mark.skip(reason="Prism doesn't support JSONL responses yet") @parametrize async def test_raw_response_list_tools(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.tool_runtime.with_raw_response.list_tools() @@ -190,11 +177,9 @@ async def test_raw_response_list_tools(self, async_client: AsyncLlamaStackClient assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" tool_runtime = await response.parse() - assert_matches_type(ToolDef, tool_runtime, path=["response"]) + assert_matches_type(AsyncJSONLDecoder[ToolDef], tool_runtime, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type application/jsonl, Prism mock server will fail" - ) + @pytest.mark.skip(reason="Prism doesn't support JSONL responses yet") @parametrize async def test_streaming_response_list_tools(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.tool_runtime.with_streaming_response.list_tools() as response: @@ -202,6 +187,6 @@ async def test_streaming_response_list_tools(self, async_client: AsyncLlamaStack assert response.http_request.headers.get("X-Stainless-Lang") == "python" tool_runtime = await response.parse() - assert_matches_type(ToolDef, tool_runtime, path=["response"]) + assert_matches_type(AsyncJSONLDecoder[ToolDef], tool_runtime, path=["response"]) assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/test_vector_dbs.py b/tests/api_resources/test_vector_dbs.py new file mode 100644 index 00000000..68891d0f --- /dev/null +++ b/tests/api_resources/test_vector_dbs.py @@ -0,0 +1,374 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, Optional, cast + +import pytest + +from tests.utils import assert_matches_type +from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient +from llama_stack_client.types import ( + VectorDBListResponse, + VectorDBRegisterResponse, + VectorDBRetrieveResponse, +) + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestVectorDBs: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + def test_method_retrieve(self, client: LlamaStackClient) -> None: + vector_db = client.vector_dbs.retrieve( + vector_db_id="vector_db_id", + ) + assert_matches_type(Optional[VectorDBRetrieveResponse], vector_db, path=["response"]) + + @parametrize + def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None: + vector_db = client.vector_dbs.retrieve( + vector_db_id="vector_db_id", + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert_matches_type(Optional[VectorDBRetrieveResponse], vector_db, path=["response"]) + + @parametrize + def test_raw_response_retrieve(self, client: LlamaStackClient) -> None: + response = client.vector_dbs.with_raw_response.retrieve( + vector_db_id="vector_db_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + vector_db = response.parse() + assert_matches_type(Optional[VectorDBRetrieveResponse], vector_db, path=["response"]) + + @parametrize + def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None: + with client.vector_dbs.with_streaming_response.retrieve( + vector_db_id="vector_db_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + vector_db = response.parse() + assert_matches_type(Optional[VectorDBRetrieveResponse], vector_db, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_retrieve(self, client: LlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_db_id` but received ''"): + client.vector_dbs.with_raw_response.retrieve( + vector_db_id="", + ) + + @parametrize + def test_method_list(self, client: LlamaStackClient) -> None: + vector_db = client.vector_dbs.list() + assert_matches_type(VectorDBListResponse, vector_db, path=["response"]) + + @parametrize + def test_method_list_with_all_params(self, client: LlamaStackClient) -> None: + vector_db = client.vector_dbs.list( + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert_matches_type(VectorDBListResponse, vector_db, path=["response"]) + + @parametrize + def test_raw_response_list(self, client: LlamaStackClient) -> None: + response = client.vector_dbs.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + vector_db = response.parse() + assert_matches_type(VectorDBListResponse, vector_db, path=["response"]) + + @parametrize + def test_streaming_response_list(self, client: LlamaStackClient) -> None: + with client.vector_dbs.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + vector_db = response.parse() + assert_matches_type(VectorDBListResponse, vector_db, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_register(self, client: LlamaStackClient) -> None: + vector_db = client.vector_dbs.register( + embedding_model="embedding_model", + vector_db_id="vector_db_id", + ) + assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"]) + + @parametrize + def test_method_register_with_all_params(self, client: LlamaStackClient) -> None: + vector_db = client.vector_dbs.register( + embedding_model="embedding_model", + vector_db_id="vector_db_id", + embedding_dimension=0, + provider_id="provider_id", + provider_vector_db_id="provider_vector_db_id", + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"]) + + @parametrize + def test_raw_response_register(self, client: LlamaStackClient) -> None: + response = client.vector_dbs.with_raw_response.register( + embedding_model="embedding_model", + vector_db_id="vector_db_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + vector_db = response.parse() + assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"]) + + @parametrize + def test_streaming_response_register(self, client: LlamaStackClient) -> None: + with client.vector_dbs.with_streaming_response.register( + embedding_model="embedding_model", + vector_db_id="vector_db_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + vector_db = response.parse() + assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_unregister(self, client: LlamaStackClient) -> None: + vector_db = client.vector_dbs.unregister( + vector_db_id="vector_db_id", + ) + assert vector_db is None + + @parametrize + def test_method_unregister_with_all_params(self, client: LlamaStackClient) -> None: + vector_db = client.vector_dbs.unregister( + vector_db_id="vector_db_id", + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert vector_db is None + + @parametrize + def test_raw_response_unregister(self, client: LlamaStackClient) -> None: + response = client.vector_dbs.with_raw_response.unregister( + vector_db_id="vector_db_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + vector_db = response.parse() + assert vector_db is None + + @parametrize + def test_streaming_response_unregister(self, client: LlamaStackClient) -> None: + with client.vector_dbs.with_streaming_response.unregister( + vector_db_id="vector_db_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + vector_db = response.parse() + assert vector_db is None + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_unregister(self, client: LlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_db_id` but received ''"): + client.vector_dbs.with_raw_response.unregister( + vector_db_id="", + ) + + +class TestAsyncVectorDBs: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + vector_db = await async_client.vector_dbs.retrieve( + vector_db_id="vector_db_id", + ) + assert_matches_type(Optional[VectorDBRetrieveResponse], vector_db, path=["response"]) + + @parametrize + async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: + vector_db = await async_client.vector_dbs.retrieve( + vector_db_id="vector_db_id", + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert_matches_type(Optional[VectorDBRetrieveResponse], vector_db, path=["response"]) + + @parametrize + async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.vector_dbs.with_raw_response.retrieve( + vector_db_id="vector_db_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + vector_db = await response.parse() + assert_matches_type(Optional[VectorDBRetrieveResponse], vector_db, path=["response"]) + + @parametrize + async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.vector_dbs.with_streaming_response.retrieve( + vector_db_id="vector_db_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + vector_db = await response.parse() + assert_matches_type(Optional[VectorDBRetrieveResponse], vector_db, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_db_id` but received ''"): + await async_client.vector_dbs.with_raw_response.retrieve( + vector_db_id="", + ) + + @parametrize + async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None: + vector_db = await async_client.vector_dbs.list() + assert_matches_type(VectorDBListResponse, vector_db, path=["response"]) + + @parametrize + async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: + vector_db = await async_client.vector_dbs.list( + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert_matches_type(VectorDBListResponse, vector_db, path=["response"]) + + @parametrize + async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.vector_dbs.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + vector_db = await response.parse() + assert_matches_type(VectorDBListResponse, vector_db, path=["response"]) + + @parametrize + async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.vector_dbs.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + vector_db = await response.parse() + assert_matches_type(VectorDBListResponse, vector_db, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None: + vector_db = await async_client.vector_dbs.register( + embedding_model="embedding_model", + vector_db_id="vector_db_id", + ) + assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"]) + + @parametrize + async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: + vector_db = await async_client.vector_dbs.register( + embedding_model="embedding_model", + vector_db_id="vector_db_id", + embedding_dimension=0, + provider_id="provider_id", + provider_vector_db_id="provider_vector_db_id", + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"]) + + @parametrize + async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.vector_dbs.with_raw_response.register( + embedding_model="embedding_model", + vector_db_id="vector_db_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + vector_db = await response.parse() + assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"]) + + @parametrize + async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.vector_dbs.with_streaming_response.register( + embedding_model="embedding_model", + vector_db_id="vector_db_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + vector_db = await response.parse() + assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_unregister(self, async_client: AsyncLlamaStackClient) -> None: + vector_db = await async_client.vector_dbs.unregister( + vector_db_id="vector_db_id", + ) + assert vector_db is None + + @parametrize + async def test_method_unregister_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: + vector_db = await async_client.vector_dbs.unregister( + vector_db_id="vector_db_id", + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert vector_db is None + + @parametrize + async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.vector_dbs.with_raw_response.unregister( + vector_db_id="vector_db_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + vector_db = await response.parse() + assert vector_db is None + + @parametrize + async def test_streaming_response_unregister(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.vector_dbs.with_streaming_response.unregister( + vector_db_id="vector_db_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + vector_db = await response.parse() + assert vector_db is None + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_unregister(self, async_client: AsyncLlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_db_id` but received ''"): + await async_client.vector_dbs.with_raw_response.unregister( + vector_db_id="", + ) diff --git a/tests/api_resources/test_vector_io.py b/tests/api_resources/test_vector_io.py new file mode 100644 index 00000000..25562be6 --- /dev/null +++ b/tests/api_resources/test_vector_io.py @@ -0,0 +1,242 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from tests.utils import assert_matches_type +from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient +from llama_stack_client.types import QueryChunksResponse + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestVectorIo: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + def test_method_insert(self, client: LlamaStackClient) -> None: + vector_io = client.vector_io.insert( + chunks=[ + { + "content": "string", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ) + assert vector_io is None + + @parametrize + def test_method_insert_with_all_params(self, client: LlamaStackClient) -> None: + vector_io = client.vector_io.insert( + chunks=[ + { + "content": "string", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ttl_seconds=0, + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert vector_io is None + + @parametrize + def test_raw_response_insert(self, client: LlamaStackClient) -> None: + response = client.vector_io.with_raw_response.insert( + chunks=[ + { + "content": "string", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + vector_io = response.parse() + assert vector_io is None + + @parametrize + def test_streaming_response_insert(self, client: LlamaStackClient) -> None: + with client.vector_io.with_streaming_response.insert( + chunks=[ + { + "content": "string", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + vector_io = response.parse() + assert vector_io is None + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_query(self, client: LlamaStackClient) -> None: + vector_io = client.vector_io.query( + query="string", + vector_db_id="vector_db_id", + ) + assert_matches_type(QueryChunksResponse, vector_io, path=["response"]) + + @parametrize + def test_method_query_with_all_params(self, client: LlamaStackClient) -> None: + vector_io = client.vector_io.query( + query="string", + vector_db_id="vector_db_id", + params={"foo": True}, + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert_matches_type(QueryChunksResponse, vector_io, path=["response"]) + + @parametrize + def test_raw_response_query(self, client: LlamaStackClient) -> None: + response = client.vector_io.with_raw_response.query( + query="string", + vector_db_id="vector_db_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + vector_io = response.parse() + assert_matches_type(QueryChunksResponse, vector_io, path=["response"]) + + @parametrize + def test_streaming_response_query(self, client: LlamaStackClient) -> None: + with client.vector_io.with_streaming_response.query( + query="string", + vector_db_id="vector_db_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + vector_io = response.parse() + assert_matches_type(QueryChunksResponse, vector_io, path=["response"]) + + assert cast(Any, response.is_closed) is True + + +class TestAsyncVectorIo: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + async def test_method_insert(self, async_client: AsyncLlamaStackClient) -> None: + vector_io = await async_client.vector_io.insert( + chunks=[ + { + "content": "string", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ) + assert vector_io is None + + @parametrize + async def test_method_insert_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: + vector_io = await async_client.vector_io.insert( + chunks=[ + { + "content": "string", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ttl_seconds=0, + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert vector_io is None + + @parametrize + async def test_raw_response_insert(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.vector_io.with_raw_response.insert( + chunks=[ + { + "content": "string", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + vector_io = await response.parse() + assert vector_io is None + + @parametrize + async def test_streaming_response_insert(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.vector_io.with_streaming_response.insert( + chunks=[ + { + "content": "string", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + vector_io = await response.parse() + assert vector_io is None + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_query(self, async_client: AsyncLlamaStackClient) -> None: + vector_io = await async_client.vector_io.query( + query="string", + vector_db_id="vector_db_id", + ) + assert_matches_type(QueryChunksResponse, vector_io, path=["response"]) + + @parametrize + async def test_method_query_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: + vector_io = await async_client.vector_io.query( + query="string", + vector_db_id="vector_db_id", + params={"foo": True}, + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert_matches_type(QueryChunksResponse, vector_io, path=["response"]) + + @parametrize + async def test_raw_response_query(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.vector_io.with_raw_response.query( + query="string", + vector_db_id="vector_db_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + vector_io = await response.parse() + assert_matches_type(QueryChunksResponse, vector_io, path=["response"]) + + @parametrize + async def test_streaming_response_query(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.vector_io.with_streaming_response.query( + query="string", + vector_db_id="vector_db_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + vector_io = await response.parse() + assert_matches_type(QueryChunksResponse, vector_io, path=["response"]) + + assert cast(Any, response.is_closed) is True diff --git a/tests/api_resources/tool_runtime/__init__.py b/tests/api_resources/tool_runtime/__init__.py new file mode 100644 index 00000000..fd8019a9 --- /dev/null +++ b/tests/api_resources/tool_runtime/__init__.py @@ -0,0 +1 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. diff --git a/tests/api_resources/tool_runtime/test_rag_tool.py b/tests/api_resources/tool_runtime/test_rag_tool.py new file mode 100644 index 00000000..075b046e --- /dev/null +++ b/tests/api_resources/tool_runtime/test_rag_tool.py @@ -0,0 +1,274 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from tests.utils import assert_matches_type +from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient +from llama_stack_client.types.tool_runtime import ( + QueryResult, +) + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestRagTool: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + def test_method_insert(self, client: LlamaStackClient) -> None: + rag_tool = client.tool_runtime.rag_tool.insert( + chunk_size_in_tokens=0, + documents=[ + { + "content": "string", + "document_id": "document_id", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ) + assert rag_tool is None + + @parametrize + def test_method_insert_with_all_params(self, client: LlamaStackClient) -> None: + rag_tool = client.tool_runtime.rag_tool.insert( + chunk_size_in_tokens=0, + documents=[ + { + "content": "string", + "document_id": "document_id", + "metadata": {"foo": True}, + "mime_type": "mime_type", + } + ], + vector_db_id="vector_db_id", + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert rag_tool is None + + @parametrize + def test_raw_response_insert(self, client: LlamaStackClient) -> None: + response = client.tool_runtime.rag_tool.with_raw_response.insert( + chunk_size_in_tokens=0, + documents=[ + { + "content": "string", + "document_id": "document_id", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + rag_tool = response.parse() + assert rag_tool is None + + @parametrize + def test_streaming_response_insert(self, client: LlamaStackClient) -> None: + with client.tool_runtime.rag_tool.with_streaming_response.insert( + chunk_size_in_tokens=0, + documents=[ + { + "content": "string", + "document_id": "document_id", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + rag_tool = response.parse() + assert rag_tool is None + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_query(self, client: LlamaStackClient) -> None: + rag_tool = client.tool_runtime.rag_tool.query( + content="string", + vector_db_ids=["string"], + ) + assert_matches_type(QueryResult, rag_tool, path=["response"]) + + @parametrize + def test_method_query_with_all_params(self, client: LlamaStackClient) -> None: + rag_tool = client.tool_runtime.rag_tool.query( + content="string", + vector_db_ids=["string"], + query_config={ + "max_chunks": 0, + "max_tokens_in_context": 0, + "query_generator_config": { + "separator": "separator", + "type": "default", + }, + }, + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert_matches_type(QueryResult, rag_tool, path=["response"]) + + @parametrize + def test_raw_response_query(self, client: LlamaStackClient) -> None: + response = client.tool_runtime.rag_tool.with_raw_response.query( + content="string", + vector_db_ids=["string"], + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + rag_tool = response.parse() + assert_matches_type(QueryResult, rag_tool, path=["response"]) + + @parametrize + def test_streaming_response_query(self, client: LlamaStackClient) -> None: + with client.tool_runtime.rag_tool.with_streaming_response.query( + content="string", + vector_db_ids=["string"], + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + rag_tool = response.parse() + assert_matches_type(QueryResult, rag_tool, path=["response"]) + + assert cast(Any, response.is_closed) is True + + +class TestAsyncRagTool: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + async def test_method_insert(self, async_client: AsyncLlamaStackClient) -> None: + rag_tool = await async_client.tool_runtime.rag_tool.insert( + chunk_size_in_tokens=0, + documents=[ + { + "content": "string", + "document_id": "document_id", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ) + assert rag_tool is None + + @parametrize + async def test_method_insert_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: + rag_tool = await async_client.tool_runtime.rag_tool.insert( + chunk_size_in_tokens=0, + documents=[ + { + "content": "string", + "document_id": "document_id", + "metadata": {"foo": True}, + "mime_type": "mime_type", + } + ], + vector_db_id="vector_db_id", + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert rag_tool is None + + @parametrize + async def test_raw_response_insert(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.tool_runtime.rag_tool.with_raw_response.insert( + chunk_size_in_tokens=0, + documents=[ + { + "content": "string", + "document_id": "document_id", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + rag_tool = await response.parse() + assert rag_tool is None + + @parametrize + async def test_streaming_response_insert(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.tool_runtime.rag_tool.with_streaming_response.insert( + chunk_size_in_tokens=0, + documents=[ + { + "content": "string", + "document_id": "document_id", + "metadata": {"foo": True}, + } + ], + vector_db_id="vector_db_id", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + rag_tool = await response.parse() + assert rag_tool is None + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_query(self, async_client: AsyncLlamaStackClient) -> None: + rag_tool = await async_client.tool_runtime.rag_tool.query( + content="string", + vector_db_ids=["string"], + ) + assert_matches_type(QueryResult, rag_tool, path=["response"]) + + @parametrize + async def test_method_query_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: + rag_tool = await async_client.tool_runtime.rag_tool.query( + content="string", + vector_db_ids=["string"], + query_config={ + "max_chunks": 0, + "max_tokens_in_context": 0, + "query_generator_config": { + "separator": "separator", + "type": "default", + }, + }, + x_llama_stack_client_version="X-LlamaStack-Client-Version", + x_llama_stack_provider_data="X-LlamaStack-Provider-Data", + ) + assert_matches_type(QueryResult, rag_tool, path=["response"]) + + @parametrize + async def test_raw_response_query(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.tool_runtime.rag_tool.with_raw_response.query( + content="string", + vector_db_ids=["string"], + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + rag_tool = await response.parse() + assert_matches_type(QueryResult, rag_tool, path=["response"]) + + @parametrize + async def test_streaming_response_query(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.tool_runtime.rag_tool.with_streaming_response.query( + content="string", + vector_db_ids=["string"], + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + rag_tool = await response.parse() + assert_matches_type(QueryResult, rag_tool, path=["response"]) + + assert cast(Any, response.is_closed) is True diff --git a/tests/test_client.py b/tests/test_client.py index 3fb2935f..38796e2c 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -6,6 +6,7 @@ import os import sys import json +import time import asyncio import inspect import subprocess @@ -1642,10 +1643,20 @@ async def test_main() -> None: [sys.executable, "-c", test_code], text=True, ) as process: - try: - process.wait(2) - if process.returncode: - raise AssertionError("calling get_platform using asyncify resulted in a non-zero exit code") - except subprocess.TimeoutExpired as e: - process.kill() - raise AssertionError("calling get_platform using asyncify resulted in a hung process") from e + timeout = 10 # seconds + + start_time = time.monotonic() + while True: + return_code = process.poll() + if return_code is not None: + if return_code != 0: + raise AssertionError("calling get_platform using asyncify resulted in a non-zero exit code") + + # success + break + + if time.monotonic() - start_time > timeout: + process.kill() + raise AssertionError("calling get_platform using asyncify resulted in a hung process") + + time.sleep(0.1)