diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py index f9f22967..e2e2c1e5 100644 --- a/src/llama_stack_client/_client.py +++ b/src/llama_stack_client/_client.py @@ -33,6 +33,7 @@ datasets, inference, providers, + responses, telemetry, vector_io, benchmarks, @@ -71,6 +72,7 @@ class LlamaStackClient(SyncAPIClient): toolgroups: toolgroups.ToolgroupsResource tools: tools.ToolsResource tool_runtime: tool_runtime.ToolRuntimeResource + responses: responses.ResponsesResource agents: agents.AgentsResource datasets: datasets.DatasetsResource eval: eval.EvalResource @@ -153,6 +155,7 @@ def __init__( self.toolgroups = toolgroups.ToolgroupsResource(self) self.tools = tools.ToolsResource(self) self.tool_runtime = tool_runtime.ToolRuntimeResource(self) + self.responses = responses.ResponsesResource(self) self.agents = agents.AgentsResource(self) self.datasets = datasets.DatasetsResource(self) self.eval = eval.EvalResource(self) @@ -287,6 +290,7 @@ class AsyncLlamaStackClient(AsyncAPIClient): toolgroups: toolgroups.AsyncToolgroupsResource tools: tools.AsyncToolsResource tool_runtime: tool_runtime.AsyncToolRuntimeResource + responses: responses.AsyncResponsesResource agents: agents.AsyncAgentsResource datasets: datasets.AsyncDatasetsResource eval: eval.AsyncEvalResource @@ -369,6 +373,7 @@ def __init__( self.toolgroups = toolgroups.AsyncToolgroupsResource(self) self.tools = tools.AsyncToolsResource(self) self.tool_runtime = tool_runtime.AsyncToolRuntimeResource(self) + self.responses = responses.AsyncResponsesResource(self) self.agents = agents.AsyncAgentsResource(self) self.datasets = datasets.AsyncDatasetsResource(self) self.eval = eval.AsyncEvalResource(self) @@ -504,6 +509,7 @@ def __init__(self, client: LlamaStackClient) -> None: self.toolgroups = toolgroups.ToolgroupsResourceWithRawResponse(client.toolgroups) self.tools = tools.ToolsResourceWithRawResponse(client.tools) self.tool_runtime = tool_runtime.ToolRuntimeResourceWithRawResponse(client.tool_runtime) + self.responses = responses.ResponsesResourceWithRawResponse(client.responses) self.agents = agents.AgentsResourceWithRawResponse(client.agents) self.datasets = datasets.DatasetsResourceWithRawResponse(client.datasets) self.eval = eval.EvalResourceWithRawResponse(client.eval) @@ -533,6 +539,7 @@ def __init__(self, client: AsyncLlamaStackClient) -> None: self.toolgroups = toolgroups.AsyncToolgroupsResourceWithRawResponse(client.toolgroups) self.tools = tools.AsyncToolsResourceWithRawResponse(client.tools) self.tool_runtime = tool_runtime.AsyncToolRuntimeResourceWithRawResponse(client.tool_runtime) + self.responses = responses.AsyncResponsesResourceWithRawResponse(client.responses) self.agents = agents.AsyncAgentsResourceWithRawResponse(client.agents) self.datasets = datasets.AsyncDatasetsResourceWithRawResponse(client.datasets) self.eval = eval.AsyncEvalResourceWithRawResponse(client.eval) @@ -564,6 +571,7 @@ def __init__(self, client: LlamaStackClient) -> None: self.toolgroups = toolgroups.ToolgroupsResourceWithStreamingResponse(client.toolgroups) self.tools = tools.ToolsResourceWithStreamingResponse(client.tools) self.tool_runtime = tool_runtime.ToolRuntimeResourceWithStreamingResponse(client.tool_runtime) + self.responses = responses.ResponsesResourceWithStreamingResponse(client.responses) self.agents = agents.AgentsResourceWithStreamingResponse(client.agents) self.datasets = datasets.DatasetsResourceWithStreamingResponse(client.datasets) self.eval = eval.EvalResourceWithStreamingResponse(client.eval) @@ -595,6 +603,7 @@ def __init__(self, client: AsyncLlamaStackClient) -> None: self.toolgroups = toolgroups.AsyncToolgroupsResourceWithStreamingResponse(client.toolgroups) self.tools = tools.AsyncToolsResourceWithStreamingResponse(client.tools) self.tool_runtime = tool_runtime.AsyncToolRuntimeResourceWithStreamingResponse(client.tool_runtime) + self.responses = responses.AsyncResponsesResourceWithStreamingResponse(client.responses) self.agents = agents.AsyncAgentsResourceWithStreamingResponse(client.agents) self.datasets = datasets.AsyncDatasetsResourceWithStreamingResponse(client.datasets) self.eval = eval.AsyncEvalResourceWithStreamingResponse(client.eval) diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py index ff5b3260..e1c150fc 100644 --- a/src/llama_stack_client/resources/__init__.py +++ b/src/llama_stack_client/resources/__init__.py @@ -104,6 +104,14 @@ ProvidersResourceWithStreamingResponse, AsyncProvidersResourceWithStreamingResponse, ) +from .responses import ( + ResponsesResource, + AsyncResponsesResource, + ResponsesResourceWithRawResponse, + AsyncResponsesResourceWithRawResponse, + ResponsesResourceWithStreamingResponse, + AsyncResponsesResourceWithStreamingResponse, +) from .telemetry import ( TelemetryResource, AsyncTelemetryResource, @@ -204,6 +212,12 @@ "AsyncToolRuntimeResourceWithRawResponse", "ToolRuntimeResourceWithStreamingResponse", "AsyncToolRuntimeResourceWithStreamingResponse", + "ResponsesResource", + "AsyncResponsesResource", + "ResponsesResourceWithRawResponse", + "AsyncResponsesResourceWithRawResponse", + "ResponsesResourceWithStreamingResponse", + "AsyncResponsesResourceWithStreamingResponse", "AgentsResource", "AsyncAgentsResource", "AgentsResourceWithRawResponse", diff --git a/src/llama_stack_client/resources/responses.py b/src/llama_stack_client/resources/responses.py new file mode 100644 index 00000000..5e7bd4c3 --- /dev/null +++ b/src/llama_stack_client/resources/responses.py @@ -0,0 +1,496 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union, Iterable +from typing_extensions import Literal, overload + +import httpx + +from ..types import response_create_params +from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven +from .._utils import required_args, maybe_transform, async_maybe_transform +from .._compat import cached_property +from .._resource import SyncAPIResource, AsyncAPIResource +from .._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from .._streaming import Stream, AsyncStream +from .._base_client import make_request_options +from ..types.response_object import ResponseObject +from ..types.response_object_stream import ResponseObjectStream + +__all__ = ["ResponsesResource", "AsyncResponsesResource"] + + +class ResponsesResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> ResponsesResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return ResponsesResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> ResponsesResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return ResponsesResourceWithStreamingResponse(self) + + @overload + def create( + self, + *, + input: Union[str, Iterable[response_create_params.InputUnionMember1]], + model: str, + previous_response_id: str | NotGiven = NOT_GIVEN, + store: bool | NotGiven = NOT_GIVEN, + stream: Literal[False] | NotGiven = NOT_GIVEN, + tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ResponseObject: + """ + Create a new OpenAI response. + + Args: + input: Input message(s) to create the response. + + model: The underlying LLM used for completions. + + previous_response_id: (Optional) if specified, the new response will be a continuation of the previous + response. This can be used to easily fork-off new responses from existing + responses. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + input: Union[str, Iterable[response_create_params.InputUnionMember1]], + model: str, + stream: Literal[True], + previous_response_id: str | NotGiven = NOT_GIVEN, + store: bool | NotGiven = NOT_GIVEN, + tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[ResponseObjectStream]: + """ + Create a new OpenAI response. + + Args: + input: Input message(s) to create the response. + + model: The underlying LLM used for completions. + + previous_response_id: (Optional) if specified, the new response will be a continuation of the previous + response. This can be used to easily fork-off new responses from existing + responses. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + input: Union[str, Iterable[response_create_params.InputUnionMember1]], + model: str, + stream: bool, + previous_response_id: str | NotGiven = NOT_GIVEN, + store: bool | NotGiven = NOT_GIVEN, + tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ResponseObject | Stream[ResponseObjectStream]: + """ + Create a new OpenAI response. + + Args: + input: Input message(s) to create the response. + + model: The underlying LLM used for completions. + + previous_response_id: (Optional) if specified, the new response will be a continuation of the previous + response. This can be used to easily fork-off new responses from existing + responses. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["input", "model"], ["input", "model", "stream"]) + def create( + self, + *, + input: Union[str, Iterable[response_create_params.InputUnionMember1]], + model: str, + previous_response_id: str | NotGiven = NOT_GIVEN, + store: bool | NotGiven = NOT_GIVEN, + stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, + tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ResponseObject | Stream[ResponseObjectStream]: + return self._post( + "/v1/openai/v1/responses", + body=maybe_transform( + { + "input": input, + "model": model, + "previous_response_id": previous_response_id, + "store": store, + "stream": stream, + "tools": tools, + }, + response_create_params.ResponseCreateParamsStreaming + if stream + else response_create_params.ResponseCreateParamsNonStreaming, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ResponseObject, + stream=stream or False, + stream_cls=Stream[ResponseObjectStream], + ) + + def retrieve( + self, + id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ResponseObject: + """ + Retrieve an OpenAI response by its ID. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not id: + raise ValueError(f"Expected a non-empty value for `id` but received {id!r}") + return self._get( + f"/v1/openai/v1/responses/{id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ResponseObject, + ) + + +class AsyncResponsesResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncResponsesResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#accessing-raw-response-data-eg-headers + """ + return AsyncResponsesResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncResponsesResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/stainless-sdks/llama-stack-python#with_streaming_response + """ + return AsyncResponsesResourceWithStreamingResponse(self) + + @overload + async def create( + self, + *, + input: Union[str, Iterable[response_create_params.InputUnionMember1]], + model: str, + previous_response_id: str | NotGiven = NOT_GIVEN, + store: bool | NotGiven = NOT_GIVEN, + stream: Literal[False] | NotGiven = NOT_GIVEN, + tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ResponseObject: + """ + Create a new OpenAI response. + + Args: + input: Input message(s) to create the response. + + model: The underlying LLM used for completions. + + previous_response_id: (Optional) if specified, the new response will be a continuation of the previous + response. This can be used to easily fork-off new responses from existing + responses. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + input: Union[str, Iterable[response_create_params.InputUnionMember1]], + model: str, + stream: Literal[True], + previous_response_id: str | NotGiven = NOT_GIVEN, + store: bool | NotGiven = NOT_GIVEN, + tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[ResponseObjectStream]: + """ + Create a new OpenAI response. + + Args: + input: Input message(s) to create the response. + + model: The underlying LLM used for completions. + + previous_response_id: (Optional) if specified, the new response will be a continuation of the previous + response. This can be used to easily fork-off new responses from existing + responses. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + input: Union[str, Iterable[response_create_params.InputUnionMember1]], + model: str, + stream: bool, + previous_response_id: str | NotGiven = NOT_GIVEN, + store: bool | NotGiven = NOT_GIVEN, + tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ResponseObject | AsyncStream[ResponseObjectStream]: + """ + Create a new OpenAI response. + + Args: + input: Input message(s) to create the response. + + model: The underlying LLM used for completions. + + previous_response_id: (Optional) if specified, the new response will be a continuation of the previous + response. This can be used to easily fork-off new responses from existing + responses. + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @required_args(["input", "model"], ["input", "model", "stream"]) + async def create( + self, + *, + input: Union[str, Iterable[response_create_params.InputUnionMember1]], + model: str, + previous_response_id: str | NotGiven = NOT_GIVEN, + store: bool | NotGiven = NOT_GIVEN, + stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN, + tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ResponseObject | AsyncStream[ResponseObjectStream]: + return await self._post( + "/v1/openai/v1/responses", + body=await async_maybe_transform( + { + "input": input, + "model": model, + "previous_response_id": previous_response_id, + "store": store, + "stream": stream, + "tools": tools, + }, + response_create_params.ResponseCreateParamsStreaming + if stream + else response_create_params.ResponseCreateParamsNonStreaming, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ResponseObject, + stream=stream or False, + stream_cls=AsyncStream[ResponseObjectStream], + ) + + async def retrieve( + self, + id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ResponseObject: + """ + Retrieve an OpenAI response by its ID. + + Args: + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not id: + raise ValueError(f"Expected a non-empty value for `id` but received {id!r}") + return await self._get( + f"/v1/openai/v1/responses/{id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=ResponseObject, + ) + + +class ResponsesResourceWithRawResponse: + def __init__(self, responses: ResponsesResource) -> None: + self._responses = responses + + self.create = to_raw_response_wrapper( + responses.create, + ) + self.retrieve = to_raw_response_wrapper( + responses.retrieve, + ) + + +class AsyncResponsesResourceWithRawResponse: + def __init__(self, responses: AsyncResponsesResource) -> None: + self._responses = responses + + self.create = async_to_raw_response_wrapper( + responses.create, + ) + self.retrieve = async_to_raw_response_wrapper( + responses.retrieve, + ) + + +class ResponsesResourceWithStreamingResponse: + def __init__(self, responses: ResponsesResource) -> None: + self._responses = responses + + self.create = to_streamed_response_wrapper( + responses.create, + ) + self.retrieve = to_streamed_response_wrapper( + responses.retrieve, + ) + + +class AsyncResponsesResourceWithStreamingResponse: + def __init__(self, responses: AsyncResponsesResource) -> None: + self._responses = responses + + self.create = async_to_streamed_response_wrapper( + responses.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + responses.retrieve, + ) diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py index 3db3080c..815f95a1 100644 --- a/src/llama_stack_client/types/__init__.py +++ b/src/llama_stack_client/types/__init__.py @@ -45,6 +45,7 @@ from .tool_response import ToolResponse as ToolResponse from .inference_step import InferenceStep as InferenceStep from .tool_def_param import ToolDefParam as ToolDefParam +from .response_object import ResponseObject as ResponseObject from .token_log_probs import TokenLogProbs as TokenLogProbs from .shield_call_step import ShieldCallStep as ShieldCallStep from .span_with_status import SpanWithStatus as SpanWithStatus @@ -81,6 +82,8 @@ from .benchmark_config_param import BenchmarkConfigParam as BenchmarkConfigParam from .list_datasets_response import ListDatasetsResponse as ListDatasetsResponse from .provider_list_response import ProviderListResponse as ProviderListResponse +from .response_create_params import ResponseCreateParams as ResponseCreateParams +from .response_object_stream import ResponseObjectStream as ResponseObjectStream from .scoring_score_response import ScoringScoreResponse as ScoringScoreResponse from .shield_register_params import ShieldRegisterParams as ShieldRegisterParams from .tool_invocation_result import ToolInvocationResult as ToolInvocationResult diff --git a/src/llama_stack_client/types/response_create_params.py b/src/llama_stack_client/types/response_create_params.py new file mode 100644 index 00000000..5e5072f5 --- /dev/null +++ b/src/llama_stack_client/types/response_create_params.py @@ -0,0 +1,81 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Union, Iterable +from typing_extensions import Literal, Required, TypeAlias, TypedDict + +__all__ = [ + "ResponseCreateParamsBase", + "InputUnionMember1", + "InputUnionMember1ContentUnionMember1", + "InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentText", + "InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentImage", + "Tool", + "ResponseCreateParamsNonStreaming", + "ResponseCreateParamsStreaming", +] + + +class ResponseCreateParamsBase(TypedDict, total=False): + input: Required[Union[str, Iterable[InputUnionMember1]]] + """Input message(s) to create the response.""" + + model: Required[str] + """The underlying LLM used for completions.""" + + previous_response_id: str + """ + (Optional) if specified, the new response will be a continuation of the previous + response. This can be used to easily fork-off new responses from existing + responses. + """ + + store: bool + + tools: Iterable[Tool] + + +class InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentText(TypedDict, total=False): + text: Required[str] + + type: Required[Literal["input_text"]] + + +class InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentImage(TypedDict, total=False): + detail: Required[Literal["low", "high", "auto"]] + + type: Required[Literal["input_image"]] + + image_url: str + + +InputUnionMember1ContentUnionMember1: TypeAlias = Union[ + InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentText, + InputUnionMember1ContentUnionMember1OpenAIResponseInputMessageContentImage, +] + + +class InputUnionMember1(TypedDict, total=False): + content: Required[Union[str, Iterable[InputUnionMember1ContentUnionMember1]]] + + role: Required[Literal["system", "developer", "user", "assistant"]] + + type: Literal["message"] + + +class Tool(TypedDict, total=False): + type: Required[Literal["web_search", "web_search_preview_2025_03_11"]] + + search_context_size: str + + +class ResponseCreateParamsNonStreaming(ResponseCreateParamsBase, total=False): + stream: Literal[False] + + +class ResponseCreateParamsStreaming(ResponseCreateParamsBase): + stream: Required[Literal[True]] + + +ResponseCreateParams = Union[ResponseCreateParamsNonStreaming, ResponseCreateParamsStreaming] diff --git a/src/llama_stack_client/types/response_object.py b/src/llama_stack_client/types/response_object.py new file mode 100644 index 00000000..6e1161e2 --- /dev/null +++ b/src/llama_stack_client/types/response_object.py @@ -0,0 +1,82 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Union, Optional +from typing_extensions import Literal, Annotated, TypeAlias + +from .._utils import PropertyInfo +from .._models import BaseModel + +__all__ = [ + "ResponseObject", + "Output", + "OutputOpenAIResponseOutputMessage", + "OutputOpenAIResponseOutputMessageContent", + "OutputOpenAIResponseOutputMessageWebSearchToolCall", + "Error", +] + + +class OutputOpenAIResponseOutputMessageContent(BaseModel): + text: str + + type: Literal["output_text"] + + +class OutputOpenAIResponseOutputMessage(BaseModel): + id: str + + content: List[OutputOpenAIResponseOutputMessageContent] + + role: Literal["assistant"] + + status: str + + type: Literal["message"] + + +class OutputOpenAIResponseOutputMessageWebSearchToolCall(BaseModel): + id: str + + status: str + + type: Literal["web_search_call"] + + +Output: TypeAlias = Annotated[ + Union[OutputOpenAIResponseOutputMessage, OutputOpenAIResponseOutputMessageWebSearchToolCall], + PropertyInfo(discriminator="type"), +] + + +class Error(BaseModel): + code: str + + message: str + + +class ResponseObject(BaseModel): + id: str + + created_at: int + + model: str + + object: Literal["response"] + + output: List[Output] + + parallel_tool_calls: bool + + status: str + + error: Optional[Error] = None + + previous_response_id: Optional[str] = None + + temperature: Optional[float] = None + + top_p: Optional[float] = None + + truncation: Optional[str] = None + + user: Optional[str] = None diff --git a/src/llama_stack_client/types/response_object_stream.py b/src/llama_stack_client/types/response_object_stream.py new file mode 100644 index 00000000..023b8f9d --- /dev/null +++ b/src/llama_stack_client/types/response_object_stream.py @@ -0,0 +1,32 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Union +from typing_extensions import Literal, Annotated, TypeAlias + +from .._utils import PropertyInfo +from .._models import BaseModel +from .response_object import ResponseObject + +__all__ = [ + "ResponseObjectStream", + "OpenAIResponseObjectStreamResponseCreated", + "OpenAIResponseObjectStreamResponseCompleted", +] + + +class OpenAIResponseObjectStreamResponseCreated(BaseModel): + response: ResponseObject + + type: Literal["response.created"] + + +class OpenAIResponseObjectStreamResponseCompleted(BaseModel): + response: ResponseObject + + type: Literal["response.completed"] + + +ResponseObjectStream: TypeAlias = Annotated[ + Union[OpenAIResponseObjectStreamResponseCreated, OpenAIResponseObjectStreamResponseCompleted], + PropertyInfo(discriminator="type"), +] diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py new file mode 100644 index 00000000..0a2a0929 --- /dev/null +++ b/tests/api_resources/test_responses.py @@ -0,0 +1,306 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from tests.utils import assert_matches_type +from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient +from llama_stack_client.types import ResponseObject + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestResponses: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + def test_method_create_overload_1(self, client: LlamaStackClient) -> None: + response = client.responses.create( + input="string", + model="model", + ) + assert_matches_type(ResponseObject, response, path=["response"]) + + @parametrize + def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient) -> None: + response = client.responses.create( + input="string", + model="model", + previous_response_id="previous_response_id", + store=True, + stream=False, + tools=[ + { + "type": "web_search", + "search_context_size": "search_context_size", + } + ], + ) + assert_matches_type(ResponseObject, response, path=["response"]) + + @parametrize + def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None: + http_response = client.responses.with_raw_response.create( + input="string", + model="model", + ) + + assert http_response.is_closed is True + assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" + response = http_response.parse() + assert_matches_type(ResponseObject, response, path=["response"]) + + @parametrize + def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> None: + with client.responses.with_streaming_response.create( + input="string", + model="model", + ) as http_response: + assert not http_response.is_closed + assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" + + response = http_response.parse() + assert_matches_type(ResponseObject, response, path=["response"]) + + assert cast(Any, http_response.is_closed) is True + + @parametrize + def test_method_create_overload_2(self, client: LlamaStackClient) -> None: + response_stream = client.responses.create( + input="string", + model="model", + stream=True, + ) + response_stream.response.close() + + @parametrize + def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient) -> None: + response_stream = client.responses.create( + input="string", + model="model", + stream=True, + previous_response_id="previous_response_id", + store=True, + tools=[ + { + "type": "web_search", + "search_context_size": "search_context_size", + } + ], + ) + response_stream.response.close() + + @parametrize + def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None: + response = client.responses.with_raw_response.create( + input="string", + model="model", + stream=True, + ) + + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + stream = response.parse() + stream.close() + + @parametrize + def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> None: + with client.responses.with_streaming_response.create( + input="string", + model="model", + stream=True, + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + stream = response.parse() + stream.close() + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_retrieve(self, client: LlamaStackClient) -> None: + response = client.responses.retrieve( + "id", + ) + assert_matches_type(ResponseObject, response, path=["response"]) + + @parametrize + def test_raw_response_retrieve(self, client: LlamaStackClient) -> None: + http_response = client.responses.with_raw_response.retrieve( + "id", + ) + + assert http_response.is_closed is True + assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" + response = http_response.parse() + assert_matches_type(ResponseObject, response, path=["response"]) + + @parametrize + def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None: + with client.responses.with_streaming_response.retrieve( + "id", + ) as http_response: + assert not http_response.is_closed + assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" + + response = http_response.parse() + assert_matches_type(ResponseObject, response, path=["response"]) + + assert cast(Any, http_response.is_closed) is True + + @parametrize + def test_path_params_retrieve(self, client: LlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"): + client.responses.with_raw_response.retrieve( + "", + ) + + +class TestAsyncResponses: + parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.responses.create( + input="string", + model="model", + ) + assert_matches_type(ResponseObject, response, path=["response"]) + + @parametrize + async def test_method_create_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.responses.create( + input="string", + model="model", + previous_response_id="previous_response_id", + store=True, + stream=False, + tools=[ + { + "type": "web_search", + "search_context_size": "search_context_size", + } + ], + ) + assert_matches_type(ResponseObject, response, path=["response"]) + + @parametrize + async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None: + http_response = await async_client.responses.with_raw_response.create( + input="string", + model="model", + ) + + assert http_response.is_closed is True + assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" + response = await http_response.parse() + assert_matches_type(ResponseObject, response, path=["response"]) + + @parametrize + async def test_streaming_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.responses.with_streaming_response.create( + input="string", + model="model", + ) as http_response: + assert not http_response.is_closed + assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" + + response = await http_response.parse() + assert_matches_type(ResponseObject, response, path=["response"]) + + assert cast(Any, http_response.is_closed) is True + + @parametrize + async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None: + response_stream = await async_client.responses.create( + input="string", + model="model", + stream=True, + ) + await response_stream.response.aclose() + + @parametrize + async def test_method_create_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None: + response_stream = await async_client.responses.create( + input="string", + model="model", + stream=True, + previous_response_id="previous_response_id", + store=True, + tools=[ + { + "type": "web_search", + "search_context_size": "search_context_size", + } + ], + ) + await response_stream.response.aclose() + + @parametrize + async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.responses.with_raw_response.create( + input="string", + model="model", + stream=True, + ) + + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + stream = await response.parse() + await stream.close() + + @parametrize + async def test_streaming_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.responses.with_streaming_response.create( + input="string", + model="model", + stream=True, + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + stream = await response.parse() + await stream.close() + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + response = await async_client.responses.retrieve( + "id", + ) + assert_matches_type(ResponseObject, response, path=["response"]) + + @parametrize + async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + http_response = await async_client.responses.with_raw_response.retrieve( + "id", + ) + + assert http_response.is_closed is True + assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" + response = await http_response.parse() + assert_matches_type(ResponseObject, response, path=["response"]) + + @parametrize + async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + async with async_client.responses.with_streaming_response.retrieve( + "id", + ) as http_response: + assert not http_response.is_closed + assert http_response.http_request.headers.get("X-Stainless-Lang") == "python" + + response = await http_response.parse() + assert_matches_type(ResponseObject, response, path=["response"]) + + assert cast(Any, http_response.is_closed) is True + + @parametrize + async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `id` but received ''"): + await async_client.responses.with_raw_response.retrieve( + "", + )