diff --git a/LICENSE b/LICENSE index 9af3db1b..c781a0e2 100644 --- a/LICENSE +++ b/LICENSE @@ -1,201 +1,22 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2024 Llama Stack Client - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +MIT License + +Copyright (c) Meta Platforms, Inc. and affiliates + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/agents/turn.py index 8f2e9989..272ea4d9 100644 --- a/src/llama_stack_client/resources/agents/turn.py +++ b/src/llama_stack_client/resources/agents/turn.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Any, List, Iterable, cast +from typing import List, Iterable from typing_extensions import Literal, overload import httpx @@ -25,7 +25,7 @@ from ..._base_client import make_request_options from ...types.agents import turn_create_params from ...types.agents.turn import Turn -from ...types.agents.turn_create_response import TurnCreateResponse +from ...types.agents.agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk __all__ = ["TurnResource", "AsyncTurnResource"] @@ -66,7 +66,7 @@ def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> TurnCreateResponse: + ) -> Turn: """ Args: extra_headers: Send extra headers @@ -95,7 +95,7 @@ def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> Stream[TurnCreateResponse]: + ) -> Stream[AgentTurnResponseStreamChunk]: """ Args: extra_headers: Send extra headers @@ -124,7 +124,7 @@ def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> TurnCreateResponse | Stream[TurnCreateResponse]: + ) -> Turn | Stream[AgentTurnResponseStreamChunk]: """ Args: extra_headers: Send extra headers @@ -153,34 +153,28 @@ def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> TurnCreateResponse | Stream[TurnCreateResponse]: + ) -> Turn | Stream[AgentTurnResponseStreamChunk]: if not agent_id: raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}") if not session_id: raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}") - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} - return cast( - TurnCreateResponse, - self._post( - f"/v1/agents/{agent_id}/session/{session_id}/turn", - body=maybe_transform( - { - "messages": messages, - "documents": documents, - "stream": stream, - "toolgroups": toolgroups, - }, - turn_create_params.TurnCreateParams, - ), - options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout - ), - cast_to=cast( - Any, TurnCreateResponse - ), # Union types cannot be passed in as arguments in the type system - stream=stream or False, - stream_cls=Stream[TurnCreateResponse], + return self._post( + f"/v1/agents/{agent_id}/session/{session_id}/turn", + body=maybe_transform( + { + "messages": messages, + "documents": documents, + "stream": stream, + "toolgroups": toolgroups, + }, + turn_create_params.TurnCreateParams, ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Turn, + stream=stream or False, + stream_cls=Stream[AgentTurnResponseStreamChunk], ) def retrieve( @@ -257,7 +251,7 @@ async def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> TurnCreateResponse: + ) -> Turn: """ Args: extra_headers: Send extra headers @@ -286,7 +280,7 @@ async def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> AsyncStream[TurnCreateResponse]: + ) -> AsyncStream[AgentTurnResponseStreamChunk]: """ Args: extra_headers: Send extra headers @@ -315,7 +309,7 @@ async def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> TurnCreateResponse | AsyncStream[TurnCreateResponse]: + ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]: """ Args: extra_headers: Send extra headers @@ -344,34 +338,28 @@ async def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> TurnCreateResponse | AsyncStream[TurnCreateResponse]: + ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]: if not agent_id: raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}") if not session_id: raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}") - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} - return cast( - TurnCreateResponse, - await self._post( - f"/v1/agents/{agent_id}/session/{session_id}/turn", - body=await async_maybe_transform( - { - "messages": messages, - "documents": documents, - "stream": stream, - "toolgroups": toolgroups, - }, - turn_create_params.TurnCreateParams, - ), - options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout - ), - cast_to=cast( - Any, TurnCreateResponse - ), # Union types cannot be passed in as arguments in the type system - stream=stream or False, - stream_cls=AsyncStream[TurnCreateResponse], + return await self._post( + f"/v1/agents/{agent_id}/session/{session_id}/turn", + body=await async_maybe_transform( + { + "messages": messages, + "documents": documents, + "stream": stream, + "toolgroups": toolgroups, + }, + turn_create_params.TurnCreateParams, ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=Turn, + stream=stream or False, + stream_cls=AsyncStream[AgentTurnResponseStreamChunk], ) async def retrieve( diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py index ce88b3b8..8971a921 100644 --- a/src/llama_stack_client/resources/inference.py +++ b/src/llama_stack_client/resources/inference.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Any, List, Iterable, cast +from typing import List, Iterable from typing_extensions import Literal, overload import httpx @@ -28,13 +28,14 @@ ) from .._streaming import Stream, AsyncStream from .._base_client import make_request_options +from ..types.completion_response import CompletionResponse from ..types.embeddings_response import EmbeddingsResponse from ..types.shared_params.message import Message -from ..types.inference_completion_response import InferenceCompletionResponse from ..types.shared_params.response_format import ResponseFormat from ..types.shared_params.sampling_params import SamplingParams +from ..types.shared.chat_completion_response import ChatCompletionResponse from ..types.shared_params.interleaved_content import InterleavedContent -from ..types.inference_chat_completion_response import InferenceChatCompletionResponse +from ..types.chat_completion_response_stream_chunk import ChatCompletionResponseStreamChunk __all__ = ["InferenceResource", "AsyncInferenceResource"] @@ -78,7 +79,7 @@ def chat_completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> InferenceChatCompletionResponse: + ) -> ChatCompletionResponse: """ Generate a chat completion for the given messages using the specified model. @@ -142,7 +143,7 @@ def chat_completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> Stream[InferenceChatCompletionResponse]: + ) -> Stream[ChatCompletionResponseStreamChunk]: """ Generate a chat completion for the given messages using the specified model. @@ -206,7 +207,7 @@ def chat_completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> InferenceChatCompletionResponse | Stream[InferenceChatCompletionResponse]: + ) -> ChatCompletionResponse | Stream[ChatCompletionResponseStreamChunk]: """ Generate a chat completion for the given messages using the specified model. @@ -270,35 +271,29 @@ def chat_completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> InferenceChatCompletionResponse | Stream[InferenceChatCompletionResponse]: - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} - return cast( - InferenceChatCompletionResponse, - self._post( - "/v1/inference/chat-completion", - body=maybe_transform( - { - "messages": messages, - "model_id": model_id, - "logprobs": logprobs, - "response_format": response_format, - "sampling_params": sampling_params, - "stream": stream, - "tool_choice": tool_choice, - "tool_prompt_format": tool_prompt_format, - "tools": tools, - }, - inference_chat_completion_params.InferenceChatCompletionParams, - ), - options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout - ), - cast_to=cast( - Any, InferenceChatCompletionResponse - ), # Union types cannot be passed in as arguments in the type system - stream=stream or False, - stream_cls=Stream[InferenceChatCompletionResponse], + ) -> ChatCompletionResponse | Stream[ChatCompletionResponseStreamChunk]: + return self._post( + "/v1/inference/chat-completion", + body=maybe_transform( + { + "messages": messages, + "model_id": model_id, + "logprobs": logprobs, + "response_format": response_format, + "sampling_params": sampling_params, + "stream": stream, + "tool_choice": tool_choice, + "tool_prompt_format": tool_prompt_format, + "tools": tools, + }, + inference_chat_completion_params.InferenceChatCompletionParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), + cast_to=ChatCompletionResponse, + stream=stream or False, + stream_cls=Stream[ChatCompletionResponseStreamChunk], ) @overload @@ -317,7 +312,7 @@ def completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> InferenceCompletionResponse: + ) -> CompletionResponse: """ Generate a completion for the given content using the specified model. @@ -363,7 +358,7 @@ def completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> Stream[InferenceCompletionResponse]: + ) -> Stream[CompletionResponse]: """ Generate a completion for the given content using the specified model. @@ -409,7 +404,7 @@ def completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> InferenceCompletionResponse | Stream[InferenceCompletionResponse]: + ) -> CompletionResponse | Stream[CompletionResponse]: """ Generate a completion for the given content using the specified model. @@ -455,32 +450,26 @@ def completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> InferenceCompletionResponse | Stream[InferenceCompletionResponse]: - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} - return cast( - InferenceCompletionResponse, - self._post( - "/v1/inference/completion", - body=maybe_transform( - { - "content": content, - "model_id": model_id, - "logprobs": logprobs, - "response_format": response_format, - "sampling_params": sampling_params, - "stream": stream, - }, - inference_completion_params.InferenceCompletionParams, - ), - options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout - ), - cast_to=cast( - Any, InferenceCompletionResponse - ), # Union types cannot be passed in as arguments in the type system - stream=stream or False, - stream_cls=Stream[InferenceCompletionResponse], + ) -> CompletionResponse | Stream[CompletionResponse]: + return self._post( + "/v1/inference/completion", + body=maybe_transform( + { + "content": content, + "model_id": model_id, + "logprobs": logprobs, + "response_format": response_format, + "sampling_params": sampling_params, + "stream": stream, + }, + inference_completion_params.InferenceCompletionParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), + cast_to=CompletionResponse, + stream=stream or False, + stream_cls=Stream[CompletionResponse], ) def embeddings( @@ -569,7 +558,7 @@ async def chat_completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> InferenceChatCompletionResponse: + ) -> ChatCompletionResponse: """ Generate a chat completion for the given messages using the specified model. @@ -633,7 +622,7 @@ async def chat_completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> AsyncStream[InferenceChatCompletionResponse]: + ) -> AsyncStream[ChatCompletionResponseStreamChunk]: """ Generate a chat completion for the given messages using the specified model. @@ -697,7 +686,7 @@ async def chat_completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> InferenceChatCompletionResponse | AsyncStream[InferenceChatCompletionResponse]: + ) -> ChatCompletionResponse | AsyncStream[ChatCompletionResponseStreamChunk]: """ Generate a chat completion for the given messages using the specified model. @@ -761,35 +750,29 @@ async def chat_completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> InferenceChatCompletionResponse | AsyncStream[InferenceChatCompletionResponse]: - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} - return cast( - InferenceChatCompletionResponse, - await self._post( - "/v1/inference/chat-completion", - body=await async_maybe_transform( - { - "messages": messages, - "model_id": model_id, - "logprobs": logprobs, - "response_format": response_format, - "sampling_params": sampling_params, - "stream": stream, - "tool_choice": tool_choice, - "tool_prompt_format": tool_prompt_format, - "tools": tools, - }, - inference_chat_completion_params.InferenceChatCompletionParams, - ), - options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout - ), - cast_to=cast( - Any, InferenceChatCompletionResponse - ), # Union types cannot be passed in as arguments in the type system - stream=stream or False, - stream_cls=AsyncStream[InferenceChatCompletionResponse], + ) -> ChatCompletionResponse | AsyncStream[ChatCompletionResponseStreamChunk]: + return await self._post( + "/v1/inference/chat-completion", + body=await async_maybe_transform( + { + "messages": messages, + "model_id": model_id, + "logprobs": logprobs, + "response_format": response_format, + "sampling_params": sampling_params, + "stream": stream, + "tool_choice": tool_choice, + "tool_prompt_format": tool_prompt_format, + "tools": tools, + }, + inference_chat_completion_params.InferenceChatCompletionParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), + cast_to=ChatCompletionResponse, + stream=stream or False, + stream_cls=AsyncStream[ChatCompletionResponseStreamChunk], ) @overload @@ -808,7 +791,7 @@ async def completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> InferenceCompletionResponse: + ) -> CompletionResponse: """ Generate a completion for the given content using the specified model. @@ -854,7 +837,7 @@ async def completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> AsyncStream[InferenceCompletionResponse]: + ) -> AsyncStream[CompletionResponse]: """ Generate a completion for the given content using the specified model. @@ -900,7 +883,7 @@ async def completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> InferenceCompletionResponse | AsyncStream[InferenceCompletionResponse]: + ) -> CompletionResponse | AsyncStream[CompletionResponse]: """ Generate a completion for the given content using the specified model. @@ -946,32 +929,26 @@ async def completion( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> InferenceCompletionResponse | AsyncStream[InferenceCompletionResponse]: - extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})} - return cast( - InferenceCompletionResponse, - await self._post( - "/v1/inference/completion", - body=await async_maybe_transform( - { - "content": content, - "model_id": model_id, - "logprobs": logprobs, - "response_format": response_format, - "sampling_params": sampling_params, - "stream": stream, - }, - inference_completion_params.InferenceCompletionParams, - ), - options=make_request_options( - extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout - ), - cast_to=cast( - Any, InferenceCompletionResponse - ), # Union types cannot be passed in as arguments in the type system - stream=stream or False, - stream_cls=AsyncStream[InferenceCompletionResponse], + ) -> CompletionResponse | AsyncStream[CompletionResponse]: + return await self._post( + "/v1/inference/completion", + body=await async_maybe_transform( + { + "content": content, + "model_id": model_id, + "logprobs": logprobs, + "response_format": response_format, + "sampling_params": sampling_params, + "stream": stream, + }, + inference_completion_params.InferenceCompletionParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), + cast_to=CompletionResponse, + stream=stream or False, + stream_cls=AsyncStream[CompletionResponse], ) async def embeddings( diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py index 987af04e..45824a74 100644 --- a/src/llama_stack_client/types/__init__.py +++ b/src/llama_stack_client/types/__init__.py @@ -29,6 +29,7 @@ ToolParamDefinition as ToolParamDefinition, ToolResponseMessage as ToolResponseMessage, QueryGeneratorConfig as QueryGeneratorConfig, + ChatCompletionResponse as ChatCompletionResponse, InterleavedContentItem as InterleavedContentItem, ) from .shield import Shield as Shield @@ -109,7 +110,6 @@ from .datasetio_append_rows_params import DatasetioAppendRowsParams as DatasetioAppendRowsParams from .scoring_score_batch_response import ScoringScoreBatchResponse as ScoringScoreBatchResponse from .telemetry_query_spans_params import TelemetryQuerySpansParams as TelemetryQuerySpansParams -from .inference_completion_response import InferenceCompletionResponse as InferenceCompletionResponse from .telemetry_query_traces_params import TelemetryQueryTracesParams as TelemetryQueryTracesParams from .scoring_function_list_response import ScoringFunctionListResponse as ScoringFunctionListResponse from .telemetry_get_span_tree_params import TelemetryGetSpanTreeParams as TelemetryGetSpanTreeParams @@ -123,9 +123,11 @@ from .scoring_function_register_params import ScoringFunctionRegisterParams as ScoringFunctionRegisterParams from .telemetry_get_span_tree_response import TelemetryGetSpanTreeResponse as TelemetryGetSpanTreeResponse from .batch_inference_completion_params import BatchInferenceCompletionParams as BatchInferenceCompletionParams -from .inference_chat_completion_response import InferenceChatCompletionResponse as InferenceChatCompletionResponse from .synthetic_data_generation_response import SyntheticDataGenerationResponse as SyntheticDataGenerationResponse from .datasetio_get_rows_paginated_params import DatasetioGetRowsPaginatedParams as DatasetioGetRowsPaginatedParams +from .chat_completion_response_stream_chunk import ( + ChatCompletionResponseStreamChunk as ChatCompletionResponseStreamChunk, +) from .batch_inference_chat_completion_params import ( BatchInferenceChatCompletionParams as BatchInferenceChatCompletionParams, ) diff --git a/src/llama_stack_client/types/agents/__init__.py b/src/llama_stack_client/types/agents/__init__.py index 5ee899c2..be21f291 100644 --- a/src/llama_stack_client/types/agents/__init__.py +++ b/src/llama_stack_client/types/agents/__init__.py @@ -6,9 +6,9 @@ from .session import Session as Session from .turn_create_params import TurnCreateParams as TurnCreateParams from .turn_response_event import TurnResponseEvent as TurnResponseEvent -from .turn_create_response import TurnCreateResponse as TurnCreateResponse from .session_create_params import SessionCreateParams as SessionCreateParams from .step_retrieve_response import StepRetrieveResponse as StepRetrieveResponse from .session_create_response import SessionCreateResponse as SessionCreateResponse from .session_retrieve_params import SessionRetrieveParams as SessionRetrieveParams from .turn_response_event_payload import TurnResponseEventPayload as TurnResponseEventPayload +from .agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk as AgentTurnResponseStreamChunk diff --git a/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py b/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py new file mode 100644 index 00000000..bda45d88 --- /dev/null +++ b/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py @@ -0,0 +1,11 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + + +from ..._models import BaseModel +from .turn_response_event import TurnResponseEvent + +__all__ = ["AgentTurnResponseStreamChunk"] + + +class AgentTurnResponseStreamChunk(BaseModel): + event: TurnResponseEvent diff --git a/src/llama_stack_client/types/agents/turn.py b/src/llama_stack_client/types/agents/turn.py index 610da1a9..c34efa08 100644 --- a/src/llama_stack_client/types/agents/turn.py +++ b/src/llama_stack_client/types/agents/turn.py @@ -32,20 +32,29 @@ class OutputAttachmentContentImageContentItemImage(BaseModel): data: Optional[str] = None + """base64 encoded image data as string""" url: Optional[URL] = None + """A URL of the image or data URL in the format of data:image/{type};base64,{data}. + + Note that URL could have length limits. + """ class OutputAttachmentContentImageContentItem(BaseModel): image: OutputAttachmentContentImageContentItemImage + """Image as a base64 encoded string or an URL""" type: Literal["image"] + """Discriminator type of the content item. Always "image" """ class OutputAttachmentContentTextContentItem(BaseModel): text: str + """Text content""" type: Literal["text"] + """Discriminator type of the content item. Always "text" """ OutputAttachmentContent: TypeAlias = Union[ diff --git a/src/llama_stack_client/types/agents/turn_create_params.py b/src/llama_stack_client/types/agents/turn_create_params.py index f8586b6d..23bfc6a3 100644 --- a/src/llama_stack_client/types/agents/turn_create_params.py +++ b/src/llama_stack_client/types/agents/turn_create_params.py @@ -40,20 +40,29 @@ class TurnCreateParamsBase(TypedDict, total=False): class DocumentContentImageContentItemImage(TypedDict, total=False): data: str + """base64 encoded image data as string""" url: URL + """A URL of the image or data URL in the format of data:image/{type};base64,{data}. + + Note that URL could have length limits. + """ class DocumentContentImageContentItem(TypedDict, total=False): image: Required[DocumentContentImageContentItemImage] + """Image as a base64 encoded string or an URL""" type: Required[Literal["image"]] + """Discriminator type of the content item. Always "image" """ class DocumentContentTextContentItem(TypedDict, total=False): text: Required[str] + """Text content""" type: Required[Literal["text"]] + """Discriminator type of the content item. Always "text" """ DocumentContent: TypeAlias = Union[ diff --git a/src/llama_stack_client/types/batch_inference_chat_completion_response.py b/src/llama_stack_client/types/batch_inference_chat_completion_response.py index b7472b59..218b1275 100644 --- a/src/llama_stack_client/types/batch_inference_chat_completion_response.py +++ b/src/llama_stack_client/types/batch_inference_chat_completion_response.py @@ -1,21 +1,12 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. -from typing import List, Optional +from typing import List from .._models import BaseModel -from .token_log_probs import TokenLogProbs -from .shared.completion_message import CompletionMessage +from .shared.chat_completion_response import ChatCompletionResponse -__all__ = ["BatchInferenceChatCompletionResponse", "Batch"] - - -class Batch(BaseModel): - completion_message: CompletionMessage - """The complete response message""" - - logprobs: Optional[List[TokenLogProbs]] = None - """Optional log probabilities for generated tokens""" +__all__ = ["BatchInferenceChatCompletionResponse"] class BatchInferenceChatCompletionResponse(BaseModel): - batch: List[Batch] + batch: List[ChatCompletionResponse] diff --git a/src/llama_stack_client/types/chat_completion_response_stream_chunk.py b/src/llama_stack_client/types/chat_completion_response_stream_chunk.py new file mode 100644 index 00000000..99916add --- /dev/null +++ b/src/llama_stack_client/types/chat_completion_response_stream_chunk.py @@ -0,0 +1,32 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional +from typing_extensions import Literal + +from .._models import BaseModel +from .token_log_probs import TokenLogProbs +from .shared.content_delta import ContentDelta + +__all__ = ["ChatCompletionResponseStreamChunk", "Event"] + + +class Event(BaseModel): + delta: ContentDelta + """Content generated since last event. + + This can be one or more tokens, or a tool call. + """ + + event_type: Literal["start", "complete", "progress"] + """Type of the event""" + + logprobs: Optional[List[TokenLogProbs]] = None + """Optional log probabilities for generated tokens""" + + stop_reason: Optional[Literal["end_of_turn", "end_of_message", "out_of_tokens"]] = None + """Optional reason why generation stopped, if complete""" + + +class ChatCompletionResponseStreamChunk(BaseModel): + event: Event + """The event containing the new content""" diff --git a/src/llama_stack_client/types/shared/__init__.py b/src/llama_stack_client/types/shared/__init__.py index cc0a1460..075a664d 100644 --- a/src/llama_stack_client/types/shared/__init__.py +++ b/src/llama_stack_client/types/shared/__init__.py @@ -22,4 +22,5 @@ from .tool_param_definition import ToolParamDefinition as ToolParamDefinition from .tool_response_message import ToolResponseMessage as ToolResponseMessage from .query_generator_config import QueryGeneratorConfig as QueryGeneratorConfig +from .chat_completion_response import ChatCompletionResponse as ChatCompletionResponse from .interleaved_content_item import InterleavedContentItem as InterleavedContentItem diff --git a/src/llama_stack_client/types/shared/chat_completion_response.py b/src/llama_stack_client/types/shared/chat_completion_response.py new file mode 100644 index 00000000..e8c5071e --- /dev/null +++ b/src/llama_stack_client/types/shared/chat_completion_response.py @@ -0,0 +1,17 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List, Optional + +from ..._models import BaseModel +from ..token_log_probs import TokenLogProbs +from .completion_message import CompletionMessage + +__all__ = ["ChatCompletionResponse"] + + +class ChatCompletionResponse(BaseModel): + completion_message: CompletionMessage + """The complete response message""" + + logprobs: Optional[List[TokenLogProbs]] = None + """Optional log probabilities for generated tokens""" diff --git a/src/llama_stack_client/types/shared/document.py b/src/llama_stack_client/types/shared/document.py index e88960b4..c0ac8b9d 100644 --- a/src/llama_stack_client/types/shared/document.py +++ b/src/llama_stack_client/types/shared/document.py @@ -12,20 +12,29 @@ class ContentImageContentItemImage(BaseModel): data: Optional[str] = None + """base64 encoded image data as string""" url: Optional[URL] = None + """A URL of the image or data URL in the format of data:image/{type};base64,{data}. + + Note that URL could have length limits. + """ class ContentImageContentItem(BaseModel): image: ContentImageContentItemImage + """Image as a base64 encoded string or an URL""" type: Literal["image"] + """Discriminator type of the content item. Always "image" """ class ContentTextContentItem(BaseModel): text: str + """Text content""" type: Literal["text"] + """Discriminator type of the content item. Always "text" """ Content: TypeAlias = Union[str, ContentImageContentItem, ContentTextContentItem, List[InterleavedContentItem], URL] diff --git a/src/llama_stack_client/types/shared/interleaved_content.py b/src/llama_stack_client/types/shared/interleaved_content.py index 3a15e3cb..02a9b43e 100644 --- a/src/llama_stack_client/types/shared/interleaved_content.py +++ b/src/llama_stack_client/types/shared/interleaved_content.py @@ -12,20 +12,29 @@ class ImageContentItemImage(BaseModel): data: Optional[str] = None + """base64 encoded image data as string""" url: Optional[URL] = None + """A URL of the image or data URL in the format of data:image/{type};base64,{data}. + + Note that URL could have length limits. + """ class ImageContentItem(BaseModel): image: ImageContentItemImage + """Image as a base64 encoded string or an URL""" type: Literal["image"] + """Discriminator type of the content item. Always "image" """ class TextContentItem(BaseModel): text: str + """Text content""" type: Literal["text"] + """Discriminator type of the content item. Always "text" """ InterleavedContent: TypeAlias = Union[str, ImageContentItem, TextContentItem, List[InterleavedContentItem]] diff --git a/src/llama_stack_client/types/shared/interleaved_content_item.py b/src/llama_stack_client/types/shared/interleaved_content_item.py index 30a14ee3..c7030b1c 100644 --- a/src/llama_stack_client/types/shared/interleaved_content_item.py +++ b/src/llama_stack_client/types/shared/interleaved_content_item.py @@ -12,20 +12,29 @@ class ImageContentItemImage(BaseModel): data: Optional[str] = None + """base64 encoded image data as string""" url: Optional[URL] = None + """A URL of the image or data URL in the format of data:image/{type};base64,{data}. + + Note that URL could have length limits. + """ class ImageContentItem(BaseModel): image: ImageContentItemImage + """Image as a base64 encoded string or an URL""" type: Literal["image"] + """Discriminator type of the content item. Always "image" """ class TextContentItem(BaseModel): text: str + """Text content""" type: Literal["text"] + """Discriminator type of the content item. Always "text" """ InterleavedContentItem: TypeAlias = Annotated[ diff --git a/src/llama_stack_client/types/shared_params/document.py b/src/llama_stack_client/types/shared_params/document.py index 9fb05ab2..1b5d5f09 100644 --- a/src/llama_stack_client/types/shared_params/document.py +++ b/src/llama_stack_client/types/shared_params/document.py @@ -13,20 +13,29 @@ class ContentImageContentItemImage(TypedDict, total=False): data: str + """base64 encoded image data as string""" url: URL + """A URL of the image or data URL in the format of data:image/{type};base64,{data}. + + Note that URL could have length limits. + """ class ContentImageContentItem(TypedDict, total=False): image: Required[ContentImageContentItemImage] + """Image as a base64 encoded string or an URL""" type: Required[Literal["image"]] + """Discriminator type of the content item. Always "image" """ class ContentTextContentItem(TypedDict, total=False): text: Required[str] + """Text content""" type: Required[Literal["text"]] + """Discriminator type of the content item. Always "text" """ Content: TypeAlias = Union[str, ContentImageContentItem, ContentTextContentItem, Iterable[InterleavedContentItem], URL] diff --git a/src/llama_stack_client/types/shared_params/interleaved_content.py b/src/llama_stack_client/types/shared_params/interleaved_content.py index fcdfa7d7..8d5605fb 100644 --- a/src/llama_stack_client/types/shared_params/interleaved_content.py +++ b/src/llama_stack_client/types/shared_params/interleaved_content.py @@ -13,20 +13,29 @@ class ImageContentItemImage(TypedDict, total=False): data: str + """base64 encoded image data as string""" url: URL + """A URL of the image or data URL in the format of data:image/{type};base64,{data}. + + Note that URL could have length limits. + """ class ImageContentItem(TypedDict, total=False): image: Required[ImageContentItemImage] + """Image as a base64 encoded string or an URL""" type: Required[Literal["image"]] + """Discriminator type of the content item. Always "image" """ class TextContentItem(TypedDict, total=False): text: Required[str] + """Text content""" type: Required[Literal["text"]] + """Discriminator type of the content item. Always "text" """ InterleavedContent: TypeAlias = Union[str, ImageContentItem, TextContentItem, Iterable[InterleavedContentItem]] diff --git a/src/llama_stack_client/types/shared_params/interleaved_content_item.py b/src/llama_stack_client/types/shared_params/interleaved_content_item.py index 8a5da06f..acb7e6f1 100644 --- a/src/llama_stack_client/types/shared_params/interleaved_content_item.py +++ b/src/llama_stack_client/types/shared_params/interleaved_content_item.py @@ -12,20 +12,29 @@ class ImageContentItemImage(TypedDict, total=False): data: str + """base64 encoded image data as string""" url: URL + """A URL of the image or data URL in the format of data:image/{type};base64,{data}. + + Note that URL could have length limits. + """ class ImageContentItem(TypedDict, total=False): image: Required[ImageContentItemImage] + """Image as a base64 encoded string or an URL""" type: Required[Literal["image"]] + """Discriminator type of the content item. Always "image" """ class TextContentItem(TypedDict, total=False): text: Required[str] + """Text content""" type: Required[Literal["text"]] + """Discriminator type of the content item. Always "text" """ InterleavedContentItem: TypeAlias = Union[ImageContentItem, TextContentItem] diff --git a/tests/api_resources/agents/test_turn.py b/tests/api_resources/agents/test_turn.py index c6e7138e..b4bc87b3 100644 --- a/tests/api_resources/agents/test_turn.py +++ b/tests/api_resources/agents/test_turn.py @@ -9,7 +9,7 @@ from tests.utils import assert_matches_type from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient -from llama_stack_client.types.agents import Turn, TurnCreateResponse +from llama_stack_client.types.agents import Turn base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") @@ -17,9 +17,6 @@ class TestTurn: parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_method_create_overload_1(self, client: LlamaStackClient) -> None: turn = client.agents.turn.create( @@ -32,11 +29,8 @@ def test_method_create_overload_1(self, client: LlamaStackClient) -> None: } ], ) - assert_matches_type(TurnCreateResponse, turn, path=["response"]) + assert_matches_type(Turn, turn, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient) -> None: turn = client.agents.turn.create( @@ -58,11 +52,8 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient stream=False, toolgroups=["string"], ) - assert_matches_type(TurnCreateResponse, turn, path=["response"]) + assert_matches_type(Turn, turn, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None: response = client.agents.turn.with_raw_response.create( @@ -79,11 +70,8 @@ def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None: assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" turn = response.parse() - assert_matches_type(TurnCreateResponse, turn, path=["response"]) + assert_matches_type(Turn, turn, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> None: with client.agents.turn.with_streaming_response.create( @@ -100,13 +88,10 @@ def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> assert response.http_request.headers.get("X-Stainless-Lang") == "python" turn = response.parse() - assert_matches_type(TurnCreateResponse, turn, path=["response"]) + assert_matches_type(Turn, turn, path=["response"]) assert cast(Any, response.is_closed) is True - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_path_params_create_overload_1(self, client: LlamaStackClient) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"): @@ -133,9 +118,6 @@ def test_path_params_create_overload_1(self, client: LlamaStackClient) -> None: ], ) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_method_create_overload_2(self, client: LlamaStackClient) -> None: turn_stream = client.agents.turn.create( @@ -151,9 +133,6 @@ def test_method_create_overload_2(self, client: LlamaStackClient) -> None: ) turn_stream.response.close() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient) -> None: turn_stream = client.agents.turn.create( @@ -177,9 +156,6 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient ) turn_stream.response.close() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None: response = client.agents.turn.with_raw_response.create( @@ -198,9 +174,6 @@ def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None: stream = response.parse() stream.close() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> None: with client.agents.turn.with_streaming_response.create( @@ -222,9 +195,6 @@ def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> assert cast(Any, response.is_closed) is True - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_path_params_create_overload_2(self, client: LlamaStackClient) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"): @@ -317,9 +287,6 @@ def test_path_params_retrieve(self, client: LlamaStackClient) -> None: class TestAsyncTurn: parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None: turn = await async_client.agents.turn.create( @@ -332,11 +299,8 @@ async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClien } ], ) - assert_matches_type(TurnCreateResponse, turn, path=["response"]) + assert_matches_type(Turn, turn, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_method_create_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None: turn = await async_client.agents.turn.create( @@ -358,11 +322,8 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn stream=False, toolgroups=["string"], ) - assert_matches_type(TurnCreateResponse, turn, path=["response"]) + assert_matches_type(Turn, turn, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.agents.turn.with_raw_response.create( @@ -379,11 +340,8 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStac assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" turn = await response.parse() - assert_matches_type(TurnCreateResponse, turn, path=["response"]) + assert_matches_type(Turn, turn, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_streaming_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.agents.turn.with_streaming_response.create( @@ -400,13 +358,10 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncLla assert response.http_request.headers.get("X-Stainless-Lang") == "python" turn = await response.parse() - assert_matches_type(TurnCreateResponse, turn, path=["response"]) + assert_matches_type(Turn, turn, path=["response"]) assert cast(Any, response.is_closed) is True - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_path_params_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"): @@ -433,9 +388,6 @@ async def test_path_params_create_overload_1(self, async_client: AsyncLlamaStack ], ) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None: turn_stream = await async_client.agents.turn.create( @@ -451,9 +403,6 @@ async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClien ) await turn_stream.response.aclose() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_method_create_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None: turn_stream = await async_client.agents.turn.create( @@ -477,9 +426,6 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn ) await turn_stream.response.aclose() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.agents.turn.with_raw_response.create( @@ -498,9 +444,6 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStac stream = await response.parse() await stream.close() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_streaming_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.agents.turn.with_streaming_response.create( @@ -522,9 +465,6 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncLla assert cast(Any, response.is_closed) is True - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_path_params_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None: with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"): diff --git a/tests/api_resources/test_inference.py b/tests/api_resources/test_inference.py index ab03db1c..64c912d2 100644 --- a/tests/api_resources/test_inference.py +++ b/tests/api_resources/test_inference.py @@ -10,10 +10,10 @@ from tests.utils import assert_matches_type from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient from llama_stack_client.types import ( + CompletionResponse, EmbeddingsResponse, - InferenceCompletionResponse, - InferenceChatCompletionResponse, ) +from llama_stack_client.types.shared import ChatCompletionResponse base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") @@ -21,9 +21,6 @@ class TestInference: parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_method_chat_completion_overload_1(self, client: LlamaStackClient) -> None: inference = client.inference.chat_completion( @@ -35,11 +32,8 @@ def test_method_chat_completion_overload_1(self, client: LlamaStackClient) -> No ], model_id="model_id", ) - assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"]) + assert_matches_type(ChatCompletionResponse, inference, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_method_chat_completion_with_all_params_overload_1(self, client: LlamaStackClient) -> None: inference = client.inference.chat_completion( @@ -79,11 +73,8 @@ def test_method_chat_completion_with_all_params_overload_1(self, client: LlamaSt } ], ) - assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"]) + assert_matches_type(ChatCompletionResponse, inference, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_raw_response_chat_completion_overload_1(self, client: LlamaStackClient) -> None: response = client.inference.with_raw_response.chat_completion( @@ -99,11 +90,8 @@ def test_raw_response_chat_completion_overload_1(self, client: LlamaStackClient) assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" inference = response.parse() - assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"]) + assert_matches_type(ChatCompletionResponse, inference, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_streaming_response_chat_completion_overload_1(self, client: LlamaStackClient) -> None: with client.inference.with_streaming_response.chat_completion( @@ -119,13 +107,10 @@ def test_streaming_response_chat_completion_overload_1(self, client: LlamaStackC assert response.http_request.headers.get("X-Stainless-Lang") == "python" inference = response.parse() - assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"]) + assert_matches_type(ChatCompletionResponse, inference, path=["response"]) assert cast(Any, response.is_closed) is True - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_method_chat_completion_overload_2(self, client: LlamaStackClient) -> None: inference_stream = client.inference.chat_completion( @@ -140,9 +125,6 @@ def test_method_chat_completion_overload_2(self, client: LlamaStackClient) -> No ) inference_stream.response.close() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_method_chat_completion_with_all_params_overload_2(self, client: LlamaStackClient) -> None: inference_stream = client.inference.chat_completion( @@ -184,9 +166,6 @@ def test_method_chat_completion_with_all_params_overload_2(self, client: LlamaSt ) inference_stream.response.close() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_raw_response_chat_completion_overload_2(self, client: LlamaStackClient) -> None: response = client.inference.with_raw_response.chat_completion( @@ -204,9 +183,6 @@ def test_raw_response_chat_completion_overload_2(self, client: LlamaStackClient) stream = response.parse() stream.close() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_streaming_response_chat_completion_overload_2(self, client: LlamaStackClient) -> None: with client.inference.with_streaming_response.chat_completion( @@ -227,20 +203,14 @@ def test_streaming_response_chat_completion_overload_2(self, client: LlamaStackC assert cast(Any, response.is_closed) is True - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_method_completion_overload_1(self, client: LlamaStackClient) -> None: inference = client.inference.completion( content="string", model_id="model_id", ) - assert_matches_type(InferenceCompletionResponse, inference, path=["response"]) + assert_matches_type(CompletionResponse, inference, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_method_completion_with_all_params_overload_1(self, client: LlamaStackClient) -> None: inference = client.inference.completion( @@ -258,11 +228,8 @@ def test_method_completion_with_all_params_overload_1(self, client: LlamaStackCl }, stream=False, ) - assert_matches_type(InferenceCompletionResponse, inference, path=["response"]) + assert_matches_type(CompletionResponse, inference, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_raw_response_completion_overload_1(self, client: LlamaStackClient) -> None: response = client.inference.with_raw_response.completion( @@ -273,11 +240,8 @@ def test_raw_response_completion_overload_1(self, client: LlamaStackClient) -> N assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" inference = response.parse() - assert_matches_type(InferenceCompletionResponse, inference, path=["response"]) + assert_matches_type(CompletionResponse, inference, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_streaming_response_completion_overload_1(self, client: LlamaStackClient) -> None: with client.inference.with_streaming_response.completion( @@ -288,13 +252,10 @@ def test_streaming_response_completion_overload_1(self, client: LlamaStackClient assert response.http_request.headers.get("X-Stainless-Lang") == "python" inference = response.parse() - assert_matches_type(InferenceCompletionResponse, inference, path=["response"]) + assert_matches_type(CompletionResponse, inference, path=["response"]) assert cast(Any, response.is_closed) is True - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_method_completion_overload_2(self, client: LlamaStackClient) -> None: inference_stream = client.inference.completion( @@ -304,9 +265,6 @@ def test_method_completion_overload_2(self, client: LlamaStackClient) -> None: ) inference_stream.response.close() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_method_completion_with_all_params_overload_2(self, client: LlamaStackClient) -> None: inference_stream = client.inference.completion( @@ -326,9 +284,6 @@ def test_method_completion_with_all_params_overload_2(self, client: LlamaStackCl ) inference_stream.response.close() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_raw_response_completion_overload_2(self, client: LlamaStackClient) -> None: response = client.inference.with_raw_response.completion( @@ -341,9 +296,6 @@ def test_raw_response_completion_overload_2(self, client: LlamaStackClient) -> N stream = response.parse() stream.close() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize def test_streaming_response_completion_overload_2(self, client: LlamaStackClient) -> None: with client.inference.with_streaming_response.completion( @@ -397,9 +349,6 @@ def test_streaming_response_embeddings(self, client: LlamaStackClient) -> None: class TestAsyncInference: parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_method_chat_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None: inference = await async_client.inference.chat_completion( @@ -411,11 +360,8 @@ async def test_method_chat_completion_overload_1(self, async_client: AsyncLlamaS ], model_id="model_id", ) - assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"]) + assert_matches_type(ChatCompletionResponse, inference, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_method_chat_completion_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None: inference = await async_client.inference.chat_completion( @@ -455,11 +401,8 @@ async def test_method_chat_completion_with_all_params_overload_1(self, async_cli } ], ) - assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"]) + assert_matches_type(ChatCompletionResponse, inference, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_raw_response_chat_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.inference.with_raw_response.chat_completion( @@ -475,11 +418,8 @@ async def test_raw_response_chat_completion_overload_1(self, async_client: Async assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" inference = await response.parse() - assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"]) + assert_matches_type(ChatCompletionResponse, inference, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_streaming_response_chat_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.inference.with_streaming_response.chat_completion( @@ -495,13 +435,10 @@ async def test_streaming_response_chat_completion_overload_1(self, async_client: assert response.http_request.headers.get("X-Stainless-Lang") == "python" inference = await response.parse() - assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"]) + assert_matches_type(ChatCompletionResponse, inference, path=["response"]) assert cast(Any, response.is_closed) is True - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_method_chat_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None: inference_stream = await async_client.inference.chat_completion( @@ -516,9 +453,6 @@ async def test_method_chat_completion_overload_2(self, async_client: AsyncLlamaS ) await inference_stream.response.aclose() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_method_chat_completion_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None: inference_stream = await async_client.inference.chat_completion( @@ -560,9 +494,6 @@ async def test_method_chat_completion_with_all_params_overload_2(self, async_cli ) await inference_stream.response.aclose() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_raw_response_chat_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.inference.with_raw_response.chat_completion( @@ -580,9 +511,6 @@ async def test_raw_response_chat_completion_overload_2(self, async_client: Async stream = await response.parse() await stream.close() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_streaming_response_chat_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.inference.with_streaming_response.chat_completion( @@ -603,20 +531,14 @@ async def test_streaming_response_chat_completion_overload_2(self, async_client: assert cast(Any, response.is_closed) is True - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_method_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None: inference = await async_client.inference.completion( content="string", model_id="model_id", ) - assert_matches_type(InferenceCompletionResponse, inference, path=["response"]) + assert_matches_type(CompletionResponse, inference, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_method_completion_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None: inference = await async_client.inference.completion( @@ -634,11 +556,8 @@ async def test_method_completion_with_all_params_overload_1(self, async_client: }, stream=False, ) - assert_matches_type(InferenceCompletionResponse, inference, path=["response"]) + assert_matches_type(CompletionResponse, inference, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_raw_response_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.inference.with_raw_response.completion( @@ -649,11 +568,8 @@ async def test_raw_response_completion_overload_1(self, async_client: AsyncLlama assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" inference = await response.parse() - assert_matches_type(InferenceCompletionResponse, inference, path=["response"]) + assert_matches_type(CompletionResponse, inference, path=["response"]) - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_streaming_response_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.inference.with_streaming_response.completion( @@ -664,13 +580,10 @@ async def test_streaming_response_completion_overload_1(self, async_client: Asyn assert response.http_request.headers.get("X-Stainless-Lang") == "python" inference = await response.parse() - assert_matches_type(InferenceCompletionResponse, inference, path=["response"]) + assert_matches_type(CompletionResponse, inference, path=["response"]) assert cast(Any, response.is_closed) is True - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_method_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None: inference_stream = await async_client.inference.completion( @@ -680,9 +593,6 @@ async def test_method_completion_overload_2(self, async_client: AsyncLlamaStackC ) await inference_stream.response.aclose() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_method_completion_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None: inference_stream = await async_client.inference.completion( @@ -702,9 +612,6 @@ async def test_method_completion_with_all_params_overload_2(self, async_client: ) await inference_stream.response.aclose() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_raw_response_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.inference.with_raw_response.completion( @@ -717,9 +624,6 @@ async def test_raw_response_completion_overload_2(self, async_client: AsyncLlama stream = await response.parse() await stream.close() - @pytest.mark.skip( - reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail" - ) @parametrize async def test_streaming_response_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.inference.with_streaming_response.completion(