diff --git a/LICENSE b/LICENSE
index 9af3db1b..c781a0e2 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,201 +1,22 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright 2024 Llama Stack Client
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+MIT License
+
+Copyright (c) Meta Platforms, Inc. and affiliates
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/agents/turn.py
index 8f2e9989..272ea4d9 100644
--- a/src/llama_stack_client/resources/agents/turn.py
+++ b/src/llama_stack_client/resources/agents/turn.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Any, List, Iterable, cast
+from typing import List, Iterable
 from typing_extensions import Literal, overload
 
 import httpx
@@ -25,7 +25,7 @@
 from ..._base_client import make_request_options
 from ...types.agents import turn_create_params
 from ...types.agents.turn import Turn
-from ...types.agents.turn_create_response import TurnCreateResponse
+from ...types.agents.agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk
 
 __all__ = ["TurnResource", "AsyncTurnResource"]
 
@@ -66,7 +66,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TurnCreateResponse:
+    ) -> Turn:
         """
         Args:
           extra_headers: Send extra headers
@@ -95,7 +95,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[TurnCreateResponse]:
+    ) -> Stream[AgentTurnResponseStreamChunk]:
         """
         Args:
           extra_headers: Send extra headers
@@ -124,7 +124,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TurnCreateResponse | Stream[TurnCreateResponse]:
+    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
         """
         Args:
           extra_headers: Send extra headers
@@ -153,34 +153,28 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TurnCreateResponse | Stream[TurnCreateResponse]:
+    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        return cast(
-            TurnCreateResponse,
-            self._post(
-                f"/v1/agents/{agent_id}/session/{session_id}/turn",
-                body=maybe_transform(
-                    {
-                        "messages": messages,
-                        "documents": documents,
-                        "stream": stream,
-                        "toolgroups": toolgroups,
-                    },
-                    turn_create_params.TurnCreateParams,
-                ),
-                options=make_request_options(
-                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-                ),
-                cast_to=cast(
-                    Any, TurnCreateResponse
-                ),  # Union types cannot be passed in as arguments in the type system
-                stream=stream or False,
-                stream_cls=Stream[TurnCreateResponse],
+        return self._post(
+            f"/v1/agents/{agent_id}/session/{session_id}/turn",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "documents": documents,
+                    "stream": stream,
+                    "toolgroups": toolgroups,
+                },
+                turn_create_params.TurnCreateParams,
             ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Turn,
+            stream=stream or False,
+            stream_cls=Stream[AgentTurnResponseStreamChunk],
         )
 
     def retrieve(
@@ -257,7 +251,7 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TurnCreateResponse:
+    ) -> Turn:
         """
         Args:
           extra_headers: Send extra headers
@@ -286,7 +280,7 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[TurnCreateResponse]:
+    ) -> AsyncStream[AgentTurnResponseStreamChunk]:
         """
         Args:
           extra_headers: Send extra headers
@@ -315,7 +309,7 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TurnCreateResponse | AsyncStream[TurnCreateResponse]:
+    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
         """
         Args:
           extra_headers: Send extra headers
@@ -344,34 +338,28 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TurnCreateResponse | AsyncStream[TurnCreateResponse]:
+    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        return cast(
-            TurnCreateResponse,
-            await self._post(
-                f"/v1/agents/{agent_id}/session/{session_id}/turn",
-                body=await async_maybe_transform(
-                    {
-                        "messages": messages,
-                        "documents": documents,
-                        "stream": stream,
-                        "toolgroups": toolgroups,
-                    },
-                    turn_create_params.TurnCreateParams,
-                ),
-                options=make_request_options(
-                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-                ),
-                cast_to=cast(
-                    Any, TurnCreateResponse
-                ),  # Union types cannot be passed in as arguments in the type system
-                stream=stream or False,
-                stream_cls=AsyncStream[TurnCreateResponse],
+        return await self._post(
+            f"/v1/agents/{agent_id}/session/{session_id}/turn",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "documents": documents,
+                    "stream": stream,
+                    "toolgroups": toolgroups,
+                },
+                turn_create_params.TurnCreateParams,
             ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Turn,
+            stream=stream or False,
+            stream_cls=AsyncStream[AgentTurnResponseStreamChunk],
         )
 
     async def retrieve(
diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py
index ce88b3b8..8971a921 100644
--- a/src/llama_stack_client/resources/inference.py
+++ b/src/llama_stack_client/resources/inference.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Any, List, Iterable, cast
+from typing import List, Iterable
 from typing_extensions import Literal, overload
 
 import httpx
@@ -28,13 +28,14 @@
 )
 from .._streaming import Stream, AsyncStream
 from .._base_client import make_request_options
+from ..types.completion_response import CompletionResponse
 from ..types.embeddings_response import EmbeddingsResponse
 from ..types.shared_params.message import Message
-from ..types.inference_completion_response import InferenceCompletionResponse
 from ..types.shared_params.response_format import ResponseFormat
 from ..types.shared_params.sampling_params import SamplingParams
+from ..types.shared.chat_completion_response import ChatCompletionResponse
 from ..types.shared_params.interleaved_content import InterleavedContent
-from ..types.inference_chat_completion_response import InferenceChatCompletionResponse
+from ..types.chat_completion_response_stream_chunk import ChatCompletionResponseStreamChunk
 
 __all__ = ["InferenceResource", "AsyncInferenceResource"]
 
@@ -78,7 +79,7 @@ def chat_completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceChatCompletionResponse:
+    ) -> ChatCompletionResponse:
         """
         Generate a chat completion for the given messages using the specified model.
 
@@ -142,7 +143,7 @@ def chat_completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[InferenceChatCompletionResponse]:
+    ) -> Stream[ChatCompletionResponseStreamChunk]:
         """
         Generate a chat completion for the given messages using the specified model.
 
@@ -206,7 +207,7 @@ def chat_completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceChatCompletionResponse | Stream[InferenceChatCompletionResponse]:
+    ) -> ChatCompletionResponse | Stream[ChatCompletionResponseStreamChunk]:
         """
         Generate a chat completion for the given messages using the specified model.
 
@@ -270,35 +271,29 @@ def chat_completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceChatCompletionResponse | Stream[InferenceChatCompletionResponse]:
-        extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        return cast(
-            InferenceChatCompletionResponse,
-            self._post(
-                "/v1/inference/chat-completion",
-                body=maybe_transform(
-                    {
-                        "messages": messages,
-                        "model_id": model_id,
-                        "logprobs": logprobs,
-                        "response_format": response_format,
-                        "sampling_params": sampling_params,
-                        "stream": stream,
-                        "tool_choice": tool_choice,
-                        "tool_prompt_format": tool_prompt_format,
-                        "tools": tools,
-                    },
-                    inference_chat_completion_params.InferenceChatCompletionParams,
-                ),
-                options=make_request_options(
-                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-                ),
-                cast_to=cast(
-                    Any, InferenceChatCompletionResponse
-                ),  # Union types cannot be passed in as arguments in the type system
-                stream=stream or False,
-                stream_cls=Stream[InferenceChatCompletionResponse],
+    ) -> ChatCompletionResponse | Stream[ChatCompletionResponseStreamChunk]:
+        return self._post(
+            "/v1/inference/chat-completion",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model_id": model_id,
+                    "logprobs": logprobs,
+                    "response_format": response_format,
+                    "sampling_params": sampling_params,
+                    "stream": stream,
+                    "tool_choice": tool_choice,
+                    "tool_prompt_format": tool_prompt_format,
+                    "tools": tools,
+                },
+                inference_chat_completion_params.InferenceChatCompletionParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
+            cast_to=ChatCompletionResponse,
+            stream=stream or False,
+            stream_cls=Stream[ChatCompletionResponseStreamChunk],
         )
 
     @overload
@@ -317,7 +312,7 @@ def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceCompletionResponse:
+    ) -> CompletionResponse:
         """
         Generate a completion for the given content using the specified model.
 
@@ -363,7 +358,7 @@ def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[InferenceCompletionResponse]:
+    ) -> Stream[CompletionResponse]:
         """
         Generate a completion for the given content using the specified model.
 
@@ -409,7 +404,7 @@ def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceCompletionResponse | Stream[InferenceCompletionResponse]:
+    ) -> CompletionResponse | Stream[CompletionResponse]:
         """
         Generate a completion for the given content using the specified model.
 
@@ -455,32 +450,26 @@ def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceCompletionResponse | Stream[InferenceCompletionResponse]:
-        extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        return cast(
-            InferenceCompletionResponse,
-            self._post(
-                "/v1/inference/completion",
-                body=maybe_transform(
-                    {
-                        "content": content,
-                        "model_id": model_id,
-                        "logprobs": logprobs,
-                        "response_format": response_format,
-                        "sampling_params": sampling_params,
-                        "stream": stream,
-                    },
-                    inference_completion_params.InferenceCompletionParams,
-                ),
-                options=make_request_options(
-                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-                ),
-                cast_to=cast(
-                    Any, InferenceCompletionResponse
-                ),  # Union types cannot be passed in as arguments in the type system
-                stream=stream or False,
-                stream_cls=Stream[InferenceCompletionResponse],
+    ) -> CompletionResponse | Stream[CompletionResponse]:
+        return self._post(
+            "/v1/inference/completion",
+            body=maybe_transform(
+                {
+                    "content": content,
+                    "model_id": model_id,
+                    "logprobs": logprobs,
+                    "response_format": response_format,
+                    "sampling_params": sampling_params,
+                    "stream": stream,
+                },
+                inference_completion_params.InferenceCompletionParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
+            cast_to=CompletionResponse,
+            stream=stream or False,
+            stream_cls=Stream[CompletionResponse],
         )
 
     def embeddings(
@@ -569,7 +558,7 @@ async def chat_completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceChatCompletionResponse:
+    ) -> ChatCompletionResponse:
         """
         Generate a chat completion for the given messages using the specified model.
 
@@ -633,7 +622,7 @@ async def chat_completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[InferenceChatCompletionResponse]:
+    ) -> AsyncStream[ChatCompletionResponseStreamChunk]:
         """
         Generate a chat completion for the given messages using the specified model.
 
@@ -697,7 +686,7 @@ async def chat_completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceChatCompletionResponse | AsyncStream[InferenceChatCompletionResponse]:
+    ) -> ChatCompletionResponse | AsyncStream[ChatCompletionResponseStreamChunk]:
         """
         Generate a chat completion for the given messages using the specified model.
 
@@ -761,35 +750,29 @@ async def chat_completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceChatCompletionResponse | AsyncStream[InferenceChatCompletionResponse]:
-        extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        return cast(
-            InferenceChatCompletionResponse,
-            await self._post(
-                "/v1/inference/chat-completion",
-                body=await async_maybe_transform(
-                    {
-                        "messages": messages,
-                        "model_id": model_id,
-                        "logprobs": logprobs,
-                        "response_format": response_format,
-                        "sampling_params": sampling_params,
-                        "stream": stream,
-                        "tool_choice": tool_choice,
-                        "tool_prompt_format": tool_prompt_format,
-                        "tools": tools,
-                    },
-                    inference_chat_completion_params.InferenceChatCompletionParams,
-                ),
-                options=make_request_options(
-                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-                ),
-                cast_to=cast(
-                    Any, InferenceChatCompletionResponse
-                ),  # Union types cannot be passed in as arguments in the type system
-                stream=stream or False,
-                stream_cls=AsyncStream[InferenceChatCompletionResponse],
+    ) -> ChatCompletionResponse | AsyncStream[ChatCompletionResponseStreamChunk]:
+        return await self._post(
+            "/v1/inference/chat-completion",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model_id": model_id,
+                    "logprobs": logprobs,
+                    "response_format": response_format,
+                    "sampling_params": sampling_params,
+                    "stream": stream,
+                    "tool_choice": tool_choice,
+                    "tool_prompt_format": tool_prompt_format,
+                    "tools": tools,
+                },
+                inference_chat_completion_params.InferenceChatCompletionParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
+            cast_to=ChatCompletionResponse,
+            stream=stream or False,
+            stream_cls=AsyncStream[ChatCompletionResponseStreamChunk],
         )
 
     @overload
@@ -808,7 +791,7 @@ async def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceCompletionResponse:
+    ) -> CompletionResponse:
         """
         Generate a completion for the given content using the specified model.
 
@@ -854,7 +837,7 @@ async def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[InferenceCompletionResponse]:
+    ) -> AsyncStream[CompletionResponse]:
         """
         Generate a completion for the given content using the specified model.
 
@@ -900,7 +883,7 @@ async def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceCompletionResponse | AsyncStream[InferenceCompletionResponse]:
+    ) -> CompletionResponse | AsyncStream[CompletionResponse]:
         """
         Generate a completion for the given content using the specified model.
 
@@ -946,32 +929,26 @@ async def completion(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceCompletionResponse | AsyncStream[InferenceCompletionResponse]:
-        extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        return cast(
-            InferenceCompletionResponse,
-            await self._post(
-                "/v1/inference/completion",
-                body=await async_maybe_transform(
-                    {
-                        "content": content,
-                        "model_id": model_id,
-                        "logprobs": logprobs,
-                        "response_format": response_format,
-                        "sampling_params": sampling_params,
-                        "stream": stream,
-                    },
-                    inference_completion_params.InferenceCompletionParams,
-                ),
-                options=make_request_options(
-                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-                ),
-                cast_to=cast(
-                    Any, InferenceCompletionResponse
-                ),  # Union types cannot be passed in as arguments in the type system
-                stream=stream or False,
-                stream_cls=AsyncStream[InferenceCompletionResponse],
+    ) -> CompletionResponse | AsyncStream[CompletionResponse]:
+        return await self._post(
+            "/v1/inference/completion",
+            body=await async_maybe_transform(
+                {
+                    "content": content,
+                    "model_id": model_id,
+                    "logprobs": logprobs,
+                    "response_format": response_format,
+                    "sampling_params": sampling_params,
+                    "stream": stream,
+                },
+                inference_completion_params.InferenceCompletionParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
+            cast_to=CompletionResponse,
+            stream=stream or False,
+            stream_cls=AsyncStream[CompletionResponse],
         )
 
     async def embeddings(
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index 987af04e..45824a74 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -29,6 +29,7 @@
     ToolParamDefinition as ToolParamDefinition,
     ToolResponseMessage as ToolResponseMessage,
     QueryGeneratorConfig as QueryGeneratorConfig,
+    ChatCompletionResponse as ChatCompletionResponse,
     InterleavedContentItem as InterleavedContentItem,
 )
 from .shield import Shield as Shield
@@ -109,7 +110,6 @@
 from .datasetio_append_rows_params import DatasetioAppendRowsParams as DatasetioAppendRowsParams
 from .scoring_score_batch_response import ScoringScoreBatchResponse as ScoringScoreBatchResponse
 from .telemetry_query_spans_params import TelemetryQuerySpansParams as TelemetryQuerySpansParams
-from .inference_completion_response import InferenceCompletionResponse as InferenceCompletionResponse
 from .telemetry_query_traces_params import TelemetryQueryTracesParams as TelemetryQueryTracesParams
 from .scoring_function_list_response import ScoringFunctionListResponse as ScoringFunctionListResponse
 from .telemetry_get_span_tree_params import TelemetryGetSpanTreeParams as TelemetryGetSpanTreeParams
@@ -123,9 +123,11 @@
 from .scoring_function_register_params import ScoringFunctionRegisterParams as ScoringFunctionRegisterParams
 from .telemetry_get_span_tree_response import TelemetryGetSpanTreeResponse as TelemetryGetSpanTreeResponse
 from .batch_inference_completion_params import BatchInferenceCompletionParams as BatchInferenceCompletionParams
-from .inference_chat_completion_response import InferenceChatCompletionResponse as InferenceChatCompletionResponse
 from .synthetic_data_generation_response import SyntheticDataGenerationResponse as SyntheticDataGenerationResponse
 from .datasetio_get_rows_paginated_params import DatasetioGetRowsPaginatedParams as DatasetioGetRowsPaginatedParams
+from .chat_completion_response_stream_chunk import (
+    ChatCompletionResponseStreamChunk as ChatCompletionResponseStreamChunk,
+)
 from .batch_inference_chat_completion_params import (
     BatchInferenceChatCompletionParams as BatchInferenceChatCompletionParams,
 )
diff --git a/src/llama_stack_client/types/agents/__init__.py b/src/llama_stack_client/types/agents/__init__.py
index 5ee899c2..be21f291 100644
--- a/src/llama_stack_client/types/agents/__init__.py
+++ b/src/llama_stack_client/types/agents/__init__.py
@@ -6,9 +6,9 @@
 from .session import Session as Session
 from .turn_create_params import TurnCreateParams as TurnCreateParams
 from .turn_response_event import TurnResponseEvent as TurnResponseEvent
-from .turn_create_response import TurnCreateResponse as TurnCreateResponse
 from .session_create_params import SessionCreateParams as SessionCreateParams
 from .step_retrieve_response import StepRetrieveResponse as StepRetrieveResponse
 from .session_create_response import SessionCreateResponse as SessionCreateResponse
 from .session_retrieve_params import SessionRetrieveParams as SessionRetrieveParams
 from .turn_response_event_payload import TurnResponseEventPayload as TurnResponseEventPayload
+from .agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk as AgentTurnResponseStreamChunk
diff --git a/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py b/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py
new file mode 100644
index 00000000..bda45d88
--- /dev/null
+++ b/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+from .turn_response_event import TurnResponseEvent
+
+__all__ = ["AgentTurnResponseStreamChunk"]
+
+
+class AgentTurnResponseStreamChunk(BaseModel):
+    event: TurnResponseEvent
diff --git a/src/llama_stack_client/types/agents/turn.py b/src/llama_stack_client/types/agents/turn.py
index 610da1a9..c34efa08 100644
--- a/src/llama_stack_client/types/agents/turn.py
+++ b/src/llama_stack_client/types/agents/turn.py
@@ -32,20 +32,29 @@
 
 class OutputAttachmentContentImageContentItemImage(BaseModel):
     data: Optional[str] = None
+    """base64 encoded image data as string"""
 
     url: Optional[URL] = None
+    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
+
+    Note that URL could have length limits.
+    """
 
 
 class OutputAttachmentContentImageContentItem(BaseModel):
     image: OutputAttachmentContentImageContentItemImage
+    """Image as a base64 encoded string or an URL"""
 
     type: Literal["image"]
+    """Discriminator type of the content item. Always "image" """
 
 
 class OutputAttachmentContentTextContentItem(BaseModel):
     text: str
+    """Text content"""
 
     type: Literal["text"]
+    """Discriminator type of the content item. Always "text" """
 
 
 OutputAttachmentContent: TypeAlias = Union[
diff --git a/src/llama_stack_client/types/agents/turn_create_params.py b/src/llama_stack_client/types/agents/turn_create_params.py
index f8586b6d..23bfc6a3 100644
--- a/src/llama_stack_client/types/agents/turn_create_params.py
+++ b/src/llama_stack_client/types/agents/turn_create_params.py
@@ -40,20 +40,29 @@ class TurnCreateParamsBase(TypedDict, total=False):
 
 class DocumentContentImageContentItemImage(TypedDict, total=False):
     data: str
+    """base64 encoded image data as string"""
 
     url: URL
+    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
+
+    Note that URL could have length limits.
+    """
 
 
 class DocumentContentImageContentItem(TypedDict, total=False):
     image: Required[DocumentContentImageContentItemImage]
+    """Image as a base64 encoded string or an URL"""
 
     type: Required[Literal["image"]]
+    """Discriminator type of the content item. Always "image" """
 
 
 class DocumentContentTextContentItem(TypedDict, total=False):
     text: Required[str]
+    """Text content"""
 
     type: Required[Literal["text"]]
+    """Discriminator type of the content item. Always "text" """
 
 
 DocumentContent: TypeAlias = Union[
diff --git a/src/llama_stack_client/types/batch_inference_chat_completion_response.py b/src/llama_stack_client/types/batch_inference_chat_completion_response.py
index b7472b59..218b1275 100644
--- a/src/llama_stack_client/types/batch_inference_chat_completion_response.py
+++ b/src/llama_stack_client/types/batch_inference_chat_completion_response.py
@@ -1,21 +1,12 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List, Optional
+from typing import List
 
 from .._models import BaseModel
-from .token_log_probs import TokenLogProbs
-from .shared.completion_message import CompletionMessage
+from .shared.chat_completion_response import ChatCompletionResponse
 
-__all__ = ["BatchInferenceChatCompletionResponse", "Batch"]
-
-
-class Batch(BaseModel):
-    completion_message: CompletionMessage
-    """The complete response message"""
-
-    logprobs: Optional[List[TokenLogProbs]] = None
-    """Optional log probabilities for generated tokens"""
+__all__ = ["BatchInferenceChatCompletionResponse"]
 
 
 class BatchInferenceChatCompletionResponse(BaseModel):
-    batch: List[Batch]
+    batch: List[ChatCompletionResponse]
diff --git a/src/llama_stack_client/types/chat_completion_response_stream_chunk.py b/src/llama_stack_client/types/chat_completion_response_stream_chunk.py
new file mode 100644
index 00000000..99916add
--- /dev/null
+++ b/src/llama_stack_client/types/chat_completion_response_stream_chunk.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .token_log_probs import TokenLogProbs
+from .shared.content_delta import ContentDelta
+
+__all__ = ["ChatCompletionResponseStreamChunk", "Event"]
+
+
+class Event(BaseModel):
+    delta: ContentDelta
+    """Content generated since last event.
+
+    This can be one or more tokens, or a tool call.
+    """
+
+    event_type: Literal["start", "complete", "progress"]
+    """Type of the event"""
+
+    logprobs: Optional[List[TokenLogProbs]] = None
+    """Optional log probabilities for generated tokens"""
+
+    stop_reason: Optional[Literal["end_of_turn", "end_of_message", "out_of_tokens"]] = None
+    """Optional reason why generation stopped, if complete"""
+
+
+class ChatCompletionResponseStreamChunk(BaseModel):
+    event: Event
+    """The event containing the new content"""
diff --git a/src/llama_stack_client/types/shared/__init__.py b/src/llama_stack_client/types/shared/__init__.py
index cc0a1460..075a664d 100644
--- a/src/llama_stack_client/types/shared/__init__.py
+++ b/src/llama_stack_client/types/shared/__init__.py
@@ -22,4 +22,5 @@
 from .tool_param_definition import ToolParamDefinition as ToolParamDefinition
 from .tool_response_message import ToolResponseMessage as ToolResponseMessage
 from .query_generator_config import QueryGeneratorConfig as QueryGeneratorConfig
+from .chat_completion_response import ChatCompletionResponse as ChatCompletionResponse
 from .interleaved_content_item import InterleavedContentItem as InterleavedContentItem
diff --git a/src/llama_stack_client/types/shared/chat_completion_response.py b/src/llama_stack_client/types/shared/chat_completion_response.py
new file mode 100644
index 00000000..e8c5071e
--- /dev/null
+++ b/src/llama_stack_client/types/shared/chat_completion_response.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+from ..token_log_probs import TokenLogProbs
+from .completion_message import CompletionMessage
+
+__all__ = ["ChatCompletionResponse"]
+
+
+class ChatCompletionResponse(BaseModel):
+    completion_message: CompletionMessage
+    """The complete response message"""
+
+    logprobs: Optional[List[TokenLogProbs]] = None
+    """Optional log probabilities for generated tokens"""
diff --git a/src/llama_stack_client/types/shared/document.py b/src/llama_stack_client/types/shared/document.py
index e88960b4..c0ac8b9d 100644
--- a/src/llama_stack_client/types/shared/document.py
+++ b/src/llama_stack_client/types/shared/document.py
@@ -12,20 +12,29 @@
 
 class ContentImageContentItemImage(BaseModel):
     data: Optional[str] = None
+    """base64 encoded image data as string"""
 
     url: Optional[URL] = None
+    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
+
+    Note that URL could have length limits.
+    """
 
 
 class ContentImageContentItem(BaseModel):
     image: ContentImageContentItemImage
+    """Image as a base64 encoded string or an URL"""
 
     type: Literal["image"]
+    """Discriminator type of the content item. Always "image" """
 
 
 class ContentTextContentItem(BaseModel):
     text: str
+    """Text content"""
 
     type: Literal["text"]
+    """Discriminator type of the content item. Always "text" """
 
 
 Content: TypeAlias = Union[str, ContentImageContentItem, ContentTextContentItem, List[InterleavedContentItem], URL]
diff --git a/src/llama_stack_client/types/shared/interleaved_content.py b/src/llama_stack_client/types/shared/interleaved_content.py
index 3a15e3cb..02a9b43e 100644
--- a/src/llama_stack_client/types/shared/interleaved_content.py
+++ b/src/llama_stack_client/types/shared/interleaved_content.py
@@ -12,20 +12,29 @@
 
 class ImageContentItemImage(BaseModel):
     data: Optional[str] = None
+    """base64 encoded image data as string"""
 
     url: Optional[URL] = None
+    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
+
+    Note that URL could have length limits.
+    """
 
 
 class ImageContentItem(BaseModel):
     image: ImageContentItemImage
+    """Image as a base64 encoded string or an URL"""
 
     type: Literal["image"]
+    """Discriminator type of the content item. Always "image" """
 
 
 class TextContentItem(BaseModel):
     text: str
+    """Text content"""
 
     type: Literal["text"]
+    """Discriminator type of the content item. Always "text" """
 
 
 InterleavedContent: TypeAlias = Union[str, ImageContentItem, TextContentItem, List[InterleavedContentItem]]
diff --git a/src/llama_stack_client/types/shared/interleaved_content_item.py b/src/llama_stack_client/types/shared/interleaved_content_item.py
index 30a14ee3..c7030b1c 100644
--- a/src/llama_stack_client/types/shared/interleaved_content_item.py
+++ b/src/llama_stack_client/types/shared/interleaved_content_item.py
@@ -12,20 +12,29 @@
 
 class ImageContentItemImage(BaseModel):
     data: Optional[str] = None
+    """base64 encoded image data as string"""
 
     url: Optional[URL] = None
+    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
+
+    Note that URL could have length limits.
+    """
 
 
 class ImageContentItem(BaseModel):
     image: ImageContentItemImage
+    """Image as a base64 encoded string or an URL"""
 
     type: Literal["image"]
+    """Discriminator type of the content item. Always "image" """
 
 
 class TextContentItem(BaseModel):
     text: str
+    """Text content"""
 
     type: Literal["text"]
+    """Discriminator type of the content item. Always "text" """
 
 
 InterleavedContentItem: TypeAlias = Annotated[
diff --git a/src/llama_stack_client/types/shared_params/document.py b/src/llama_stack_client/types/shared_params/document.py
index 9fb05ab2..1b5d5f09 100644
--- a/src/llama_stack_client/types/shared_params/document.py
+++ b/src/llama_stack_client/types/shared_params/document.py
@@ -13,20 +13,29 @@
 
 class ContentImageContentItemImage(TypedDict, total=False):
     data: str
+    """base64 encoded image data as string"""
 
     url: URL
+    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
+
+    Note that URL could have length limits.
+    """
 
 
 class ContentImageContentItem(TypedDict, total=False):
     image: Required[ContentImageContentItemImage]
+    """Image as a base64 encoded string or an URL"""
 
     type: Required[Literal["image"]]
+    """Discriminator type of the content item. Always "image" """
 
 
 class ContentTextContentItem(TypedDict, total=False):
     text: Required[str]
+    """Text content"""
 
     type: Required[Literal["text"]]
+    """Discriminator type of the content item. Always "text" """
 
 
 Content: TypeAlias = Union[str, ContentImageContentItem, ContentTextContentItem, Iterable[InterleavedContentItem], URL]
diff --git a/src/llama_stack_client/types/shared_params/interleaved_content.py b/src/llama_stack_client/types/shared_params/interleaved_content.py
index fcdfa7d7..8d5605fb 100644
--- a/src/llama_stack_client/types/shared_params/interleaved_content.py
+++ b/src/llama_stack_client/types/shared_params/interleaved_content.py
@@ -13,20 +13,29 @@
 
 class ImageContentItemImage(TypedDict, total=False):
     data: str
+    """base64 encoded image data as string"""
 
     url: URL
+    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
+
+    Note that URL could have length limits.
+    """
 
 
 class ImageContentItem(TypedDict, total=False):
     image: Required[ImageContentItemImage]
+    """Image as a base64 encoded string or an URL"""
 
     type: Required[Literal["image"]]
+    """Discriminator type of the content item. Always "image" """
 
 
 class TextContentItem(TypedDict, total=False):
     text: Required[str]
+    """Text content"""
 
     type: Required[Literal["text"]]
+    """Discriminator type of the content item. Always "text" """
 
 
 InterleavedContent: TypeAlias = Union[str, ImageContentItem, TextContentItem, Iterable[InterleavedContentItem]]
diff --git a/src/llama_stack_client/types/shared_params/interleaved_content_item.py b/src/llama_stack_client/types/shared_params/interleaved_content_item.py
index 8a5da06f..acb7e6f1 100644
--- a/src/llama_stack_client/types/shared_params/interleaved_content_item.py
+++ b/src/llama_stack_client/types/shared_params/interleaved_content_item.py
@@ -12,20 +12,29 @@
 
 class ImageContentItemImage(TypedDict, total=False):
     data: str
+    """base64 encoded image data as string"""
 
     url: URL
+    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
+
+    Note that URL could have length limits.
+    """
 
 
 class ImageContentItem(TypedDict, total=False):
     image: Required[ImageContentItemImage]
+    """Image as a base64 encoded string or an URL"""
 
     type: Required[Literal["image"]]
+    """Discriminator type of the content item. Always "image" """
 
 
 class TextContentItem(TypedDict, total=False):
     text: Required[str]
+    """Text content"""
 
     type: Required[Literal["text"]]
+    """Discriminator type of the content item. Always "text" """
 
 
 InterleavedContentItem: TypeAlias = Union[ImageContentItem, TextContentItem]
diff --git a/tests/api_resources/agents/test_turn.py b/tests/api_resources/agents/test_turn.py
index c6e7138e..b4bc87b3 100644
--- a/tests/api_resources/agents/test_turn.py
+++ b/tests/api_resources/agents/test_turn.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.agents import Turn, TurnCreateResponse
+from llama_stack_client.types.agents import Turn
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -17,9 +17,6 @@
 class TestTurn:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_method_create_overload_1(self, client: LlamaStackClient) -> None:
         turn = client.agents.turn.create(
@@ -32,11 +29,8 @@ def test_method_create_overload_1(self, client: LlamaStackClient) -> None:
                 }
             ],
         )
-        assert_matches_type(TurnCreateResponse, turn, path=["response"])
+        assert_matches_type(Turn, turn, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
         turn = client.agents.turn.create(
@@ -58,11 +52,8 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
             stream=False,
             toolgroups=["string"],
         )
-        assert_matches_type(TurnCreateResponse, turn, path=["response"])
+        assert_matches_type(Turn, turn, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None:
         response = client.agents.turn.with_raw_response.create(
@@ -79,11 +70,8 @@ def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         turn = response.parse()
-        assert_matches_type(TurnCreateResponse, turn, path=["response"])
+        assert_matches_type(Turn, turn, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> None:
         with client.agents.turn.with_streaming_response.create(
@@ -100,13 +88,10 @@ def test_streaming_response_create_overload_1(self, client: LlamaStackClient) ->
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             turn = response.parse()
-            assert_matches_type(TurnCreateResponse, turn, path=["response"])
+            assert_matches_type(Turn, turn, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_path_params_create_overload_1(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
@@ -133,9 +118,6 @@ def test_path_params_create_overload_1(self, client: LlamaStackClient) -> None:
                 ],
             )
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_method_create_overload_2(self, client: LlamaStackClient) -> None:
         turn_stream = client.agents.turn.create(
@@ -151,9 +133,6 @@ def test_method_create_overload_2(self, client: LlamaStackClient) -> None:
         )
         turn_stream.response.close()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient) -> None:
         turn_stream = client.agents.turn.create(
@@ -177,9 +156,6 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
         )
         turn_stream.response.close()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None:
         response = client.agents.turn.with_raw_response.create(
@@ -198,9 +174,6 @@ def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None:
         stream = response.parse()
         stream.close()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> None:
         with client.agents.turn.with_streaming_response.create(
@@ -222,9 +195,6 @@ def test_streaming_response_create_overload_2(self, client: LlamaStackClient) ->
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_path_params_create_overload_2(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
@@ -317,9 +287,6 @@ def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
 class TestAsyncTurn:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         turn = await async_client.agents.turn.create(
@@ -332,11 +299,8 @@ async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClien
                 }
             ],
         )
-        assert_matches_type(TurnCreateResponse, turn, path=["response"])
+        assert_matches_type(Turn, turn, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_method_create_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         turn = await async_client.agents.turn.create(
@@ -358,11 +322,8 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             stream=False,
             toolgroups=["string"],
         )
-        assert_matches_type(TurnCreateResponse, turn, path=["response"])
+        assert_matches_type(Turn, turn, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.agents.turn.with_raw_response.create(
@@ -379,11 +340,8 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStac
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         turn = await response.parse()
-        assert_matches_type(TurnCreateResponse, turn, path=["response"])
+        assert_matches_type(Turn, turn, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_streaming_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.agents.turn.with_streaming_response.create(
@@ -400,13 +358,10 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncLla
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             turn = await response.parse()
-            assert_matches_type(TurnCreateResponse, turn, path=["response"])
+            assert_matches_type(Turn, turn, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_path_params_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
@@ -433,9 +388,6 @@ async def test_path_params_create_overload_1(self, async_client: AsyncLlamaStack
                 ],
             )
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         turn_stream = await async_client.agents.turn.create(
@@ -451,9 +403,6 @@ async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClien
         )
         await turn_stream.response.aclose()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_method_create_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         turn_stream = await async_client.agents.turn.create(
@@ -477,9 +426,6 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
         )
         await turn_stream.response.aclose()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.agents.turn.with_raw_response.create(
@@ -498,9 +444,6 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStac
         stream = await response.parse()
         await stream.close()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_streaming_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.agents.turn.with_streaming_response.create(
@@ -522,9 +465,6 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncLla
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_path_params_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
diff --git a/tests/api_resources/test_inference.py b/tests/api_resources/test_inference.py
index ab03db1c..64c912d2 100644
--- a/tests/api_resources/test_inference.py
+++ b/tests/api_resources/test_inference.py
@@ -10,10 +10,10 @@
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
 from llama_stack_client.types import (
+    CompletionResponse,
     EmbeddingsResponse,
-    InferenceCompletionResponse,
-    InferenceChatCompletionResponse,
 )
+from llama_stack_client.types.shared import ChatCompletionResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -21,9 +21,6 @@
 class TestInference:
     parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_method_chat_completion_overload_1(self, client: LlamaStackClient) -> None:
         inference = client.inference.chat_completion(
@@ -35,11 +32,8 @@ def test_method_chat_completion_overload_1(self, client: LlamaStackClient) -> No
             ],
             model_id="model_id",
         )
-        assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"])
+        assert_matches_type(ChatCompletionResponse, inference, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_method_chat_completion_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
         inference = client.inference.chat_completion(
@@ -79,11 +73,8 @@ def test_method_chat_completion_with_all_params_overload_1(self, client: LlamaSt
                 }
             ],
         )
-        assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"])
+        assert_matches_type(ChatCompletionResponse, inference, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_raw_response_chat_completion_overload_1(self, client: LlamaStackClient) -> None:
         response = client.inference.with_raw_response.chat_completion(
@@ -99,11 +90,8 @@ def test_raw_response_chat_completion_overload_1(self, client: LlamaStackClient)
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         inference = response.parse()
-        assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"])
+        assert_matches_type(ChatCompletionResponse, inference, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_streaming_response_chat_completion_overload_1(self, client: LlamaStackClient) -> None:
         with client.inference.with_streaming_response.chat_completion(
@@ -119,13 +107,10 @@ def test_streaming_response_chat_completion_overload_1(self, client: LlamaStackC
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             inference = response.parse()
-            assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"])
+            assert_matches_type(ChatCompletionResponse, inference, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_method_chat_completion_overload_2(self, client: LlamaStackClient) -> None:
         inference_stream = client.inference.chat_completion(
@@ -140,9 +125,6 @@ def test_method_chat_completion_overload_2(self, client: LlamaStackClient) -> No
         )
         inference_stream.response.close()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_method_chat_completion_with_all_params_overload_2(self, client: LlamaStackClient) -> None:
         inference_stream = client.inference.chat_completion(
@@ -184,9 +166,6 @@ def test_method_chat_completion_with_all_params_overload_2(self, client: LlamaSt
         )
         inference_stream.response.close()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_raw_response_chat_completion_overload_2(self, client: LlamaStackClient) -> None:
         response = client.inference.with_raw_response.chat_completion(
@@ -204,9 +183,6 @@ def test_raw_response_chat_completion_overload_2(self, client: LlamaStackClient)
         stream = response.parse()
         stream.close()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_streaming_response_chat_completion_overload_2(self, client: LlamaStackClient) -> None:
         with client.inference.with_streaming_response.chat_completion(
@@ -227,20 +203,14 @@ def test_streaming_response_chat_completion_overload_2(self, client: LlamaStackC
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_method_completion_overload_1(self, client: LlamaStackClient) -> None:
         inference = client.inference.completion(
             content="string",
             model_id="model_id",
         )
-        assert_matches_type(InferenceCompletionResponse, inference, path=["response"])
+        assert_matches_type(CompletionResponse, inference, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_method_completion_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
         inference = client.inference.completion(
@@ -258,11 +228,8 @@ def test_method_completion_with_all_params_overload_1(self, client: LlamaStackCl
             },
             stream=False,
         )
-        assert_matches_type(InferenceCompletionResponse, inference, path=["response"])
+        assert_matches_type(CompletionResponse, inference, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_raw_response_completion_overload_1(self, client: LlamaStackClient) -> None:
         response = client.inference.with_raw_response.completion(
@@ -273,11 +240,8 @@ def test_raw_response_completion_overload_1(self, client: LlamaStackClient) -> N
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         inference = response.parse()
-        assert_matches_type(InferenceCompletionResponse, inference, path=["response"])
+        assert_matches_type(CompletionResponse, inference, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_streaming_response_completion_overload_1(self, client: LlamaStackClient) -> None:
         with client.inference.with_streaming_response.completion(
@@ -288,13 +252,10 @@ def test_streaming_response_completion_overload_1(self, client: LlamaStackClient
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             inference = response.parse()
-            assert_matches_type(InferenceCompletionResponse, inference, path=["response"])
+            assert_matches_type(CompletionResponse, inference, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_method_completion_overload_2(self, client: LlamaStackClient) -> None:
         inference_stream = client.inference.completion(
@@ -304,9 +265,6 @@ def test_method_completion_overload_2(self, client: LlamaStackClient) -> None:
         )
         inference_stream.response.close()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_method_completion_with_all_params_overload_2(self, client: LlamaStackClient) -> None:
         inference_stream = client.inference.completion(
@@ -326,9 +284,6 @@ def test_method_completion_with_all_params_overload_2(self, client: LlamaStackCl
         )
         inference_stream.response.close()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_raw_response_completion_overload_2(self, client: LlamaStackClient) -> None:
         response = client.inference.with_raw_response.completion(
@@ -341,9 +296,6 @@ def test_raw_response_completion_overload_2(self, client: LlamaStackClient) -> N
         stream = response.parse()
         stream.close()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     def test_streaming_response_completion_overload_2(self, client: LlamaStackClient) -> None:
         with client.inference.with_streaming_response.completion(
@@ -397,9 +349,6 @@ def test_streaming_response_embeddings(self, client: LlamaStackClient) -> None:
 class TestAsyncInference:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_method_chat_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         inference = await async_client.inference.chat_completion(
@@ -411,11 +360,8 @@ async def test_method_chat_completion_overload_1(self, async_client: AsyncLlamaS
             ],
             model_id="model_id",
         )
-        assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"])
+        assert_matches_type(ChatCompletionResponse, inference, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_method_chat_completion_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         inference = await async_client.inference.chat_completion(
@@ -455,11 +401,8 @@ async def test_method_chat_completion_with_all_params_overload_1(self, async_cli
                 }
             ],
         )
-        assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"])
+        assert_matches_type(ChatCompletionResponse, inference, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_raw_response_chat_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.inference.with_raw_response.chat_completion(
@@ -475,11 +418,8 @@ async def test_raw_response_chat_completion_overload_1(self, async_client: Async
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         inference = await response.parse()
-        assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"])
+        assert_matches_type(ChatCompletionResponse, inference, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_streaming_response_chat_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.inference.with_streaming_response.chat_completion(
@@ -495,13 +435,10 @@ async def test_streaming_response_chat_completion_overload_1(self, async_client:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             inference = await response.parse()
-            assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"])
+            assert_matches_type(ChatCompletionResponse, inference, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_method_chat_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         inference_stream = await async_client.inference.chat_completion(
@@ -516,9 +453,6 @@ async def test_method_chat_completion_overload_2(self, async_client: AsyncLlamaS
         )
         await inference_stream.response.aclose()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_method_chat_completion_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         inference_stream = await async_client.inference.chat_completion(
@@ -560,9 +494,6 @@ async def test_method_chat_completion_with_all_params_overload_2(self, async_cli
         )
         await inference_stream.response.aclose()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_raw_response_chat_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.inference.with_raw_response.chat_completion(
@@ -580,9 +511,6 @@ async def test_raw_response_chat_completion_overload_2(self, async_client: Async
         stream = await response.parse()
         await stream.close()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_streaming_response_chat_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.inference.with_streaming_response.chat_completion(
@@ -603,20 +531,14 @@ async def test_streaming_response_chat_completion_overload_2(self, async_client:
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_method_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         inference = await async_client.inference.completion(
             content="string",
             model_id="model_id",
         )
-        assert_matches_type(InferenceCompletionResponse, inference, path=["response"])
+        assert_matches_type(CompletionResponse, inference, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_method_completion_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         inference = await async_client.inference.completion(
@@ -634,11 +556,8 @@ async def test_method_completion_with_all_params_overload_1(self, async_client:
             },
             stream=False,
         )
-        assert_matches_type(InferenceCompletionResponse, inference, path=["response"])
+        assert_matches_type(CompletionResponse, inference, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_raw_response_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.inference.with_raw_response.completion(
@@ -649,11 +568,8 @@ async def test_raw_response_completion_overload_1(self, async_client: AsyncLlama
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         inference = await response.parse()
-        assert_matches_type(InferenceCompletionResponse, inference, path=["response"])
+        assert_matches_type(CompletionResponse, inference, path=["response"])
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_streaming_response_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.inference.with_streaming_response.completion(
@@ -664,13 +580,10 @@ async def test_streaming_response_completion_overload_1(self, async_client: Asyn
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             inference = await response.parse()
-            assert_matches_type(InferenceCompletionResponse, inference, path=["response"])
+            assert_matches_type(CompletionResponse, inference, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_method_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         inference_stream = await async_client.inference.completion(
@@ -680,9 +593,6 @@ async def test_method_completion_overload_2(self, async_client: AsyncLlamaStackC
         )
         await inference_stream.response.aclose()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_method_completion_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         inference_stream = await async_client.inference.completion(
@@ -702,9 +612,6 @@ async def test_method_completion_with_all_params_overload_2(self, async_client:
         )
         await inference_stream.response.aclose()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_raw_response_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.inference.with_raw_response.completion(
@@ -717,9 +624,6 @@ async def test_raw_response_completion_overload_2(self, async_client: AsyncLlama
         stream = await response.parse()
         await stream.close()
 
-    @pytest.mark.skip(
-        reason="currently no good way to test endpoints with content type text/event-stream, Prism mock server will fail"
-    )
     @parametrize
     async def test_streaming_response_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.inference.with_streaming_response.completion(