From e391af5b47f10ca2c3fa7d36cacae1900af711b4 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 4 Nov 2025 02:01:56 +0000
Subject: [PATCH 1/2] feat(api): remove openai/v1 endpoints

---
 .stats.yml                                    |   8 +-
 README.md                                     |  51 ++++-
 api.md                                        |  35 +--
 src/llama_stack_client/_client.py             |  47 ----
 src/llama_stack_client/resources/__init__.py  |  14 --
 .../resources/models/models.py                |  23 +-
 .../resources/models/openai.py                |  18 +-
 .../resources/synthetic_data_generation.py    | 205 ------------------
 src/llama_stack_client/types/__init__.py      |   7 +-
 src/llama_stack_client/types/model.py         |  21 +-
 .../types/model_list_response.py              |  24 +-
 .../types/model_register_response.py          |  30 +++
 .../types/model_retrieve_response.py          |  30 +++
 .../types/models/__init__.py                  |   2 -
 .../types/models/openai_list_response.py      |  10 -
 .../types/shared/__init__.py                  |   1 -
 .../types/shared/message.py                   |  22 --
 .../types/shared_params/__init__.py           |   3 -
 .../types/shared_params/completion_message.py |  38 ----
 .../types/shared_params/message.py            |  21 --
 .../types/shared_params/tool_call.py          |  22 --
 ...nthetic_data_generation_generate_params.py |  31 ---
 .../synthetic_data_generation_response.py     |  24 --
 tests/api_resources/models/test_openai.py     |  14 +-
 tests/api_resources/test_models.py            |  34 +--
 .../test_synthetic_data_generation.py         | 158 --------------
 26 files changed, 174 insertions(+), 719 deletions(-)
 delete mode 100644 src/llama_stack_client/resources/synthetic_data_generation.py
 create mode 100644 src/llama_stack_client/types/model_register_response.py
 create mode 100644 src/llama_stack_client/types/model_retrieve_response.py
 delete mode 100644 src/llama_stack_client/types/models/openai_list_response.py
 delete mode 100644 src/llama_stack_client/types/shared/message.py
 delete mode 100644 src/llama_stack_client/types/shared_params/completion_message.py
 delete mode 100644 src/llama_stack_client/types/shared_params/message.py
 delete mode 100644 src/llama_stack_client/types/shared_params/tool_call.py
 delete mode 100644 src/llama_stack_client/types/synthetic_data_generation_generate_params.py
 delete mode 100644 src/llama_stack_client/types/synthetic_data_generation_response.py
 delete mode 100644 tests/api_resources/test_synthetic_data_generation.py

diff --git a/.stats.yml b/.stats.yml
index 29bc5044..8df73aad 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 112
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-a9f69d4a5f5d9bf957497cac83fdad1f72c8a44614098447762c53883e8bd987.yml
-openapi_spec_hash: 75de5bdff8e70591d6033b609fc24e5d
-config_hash: 34558d5f6e265184d712d43e231eb693
+configured_endpoints: 110
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-d95665c12a4155ef6ae80f76545152ac241d3ccab18148e4add99c0f528b9634.yml
+openapi_spec_hash: b6073c3436942c3ea6cd6c23f71a1cc4
+config_hash: 597b56196f814dd58c2cb2465aab9c9e
diff --git a/README.md b/README.md
index ceb18678..44184d41 100644
--- a/README.md
+++ b/README.md
@@ -31,16 +31,12 @@ The full API of this library can be found in [api.md](api.md). You may find basi
 ```python
 from llama_stack_client import LlamaStackClient
 
-client = LlamaStackClient(
-    base_url=f"http://{host}:{port}",
-)
+client = LlamaStackClient()
 
-response = client.chat.completions.create(
-    messages=[{"role": "user", "content": "hello world, write me a 2 sentence poem about the moon"}],
-    model="meta-llama/Llama-3.2-3B-Instruct",
-    stream=False,
+response = client.models.register(
+    model_id="model_id",
 )
-print(response)
+print(response.identifier)
 ```
 
 While you can provide an `api_key` keyword argument, we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/) to add `LLAMA_STACK_CLIENT_API_KEY="My API Key"` to your `.env` file so that your API Key is not stored in source control.
@@ -97,11 +93,10 @@ client = AsyncLlamaStackClient(
 
 
 async def main() -> None:
-    session = await client.agents.sessions.create(
-        agent_id="agent_id",
-        session_name="session_name",
+    response = await client.models.register(
+        model_id="model_id",
     )
-    print(session.session_id)
+    print(response.identifier)
 
 
 asyncio.run(main())
@@ -109,6 +104,38 @@ asyncio.run(main())
 
 Functionality between the synchronous and asynchronous clients is otherwise identical.
 
+### With aiohttp
+
+By default, the async client uses `httpx` for HTTP requests. However, for improved concurrency performance you may also use `aiohttp` as the HTTP backend.
+
+You can enable this by installing `aiohttp`:
+
+```sh
+# install from PyPI
+pip install --pre llama_stack_client[aiohttp]
+```
+
+Then you can enable it by instantiating the client with `http_client=DefaultAioHttpClient()`:
+
+```python
+import asyncio
+from llama_stack_client import DefaultAioHttpClient
+from llama_stack_client import AsyncLlamaStackClient
+
+
+async def main() -> None:
+    async with AsyncLlamaStackClient(
+        http_client=DefaultAioHttpClient(),
+    ) as client:
+        response = await client.models.register(
+            model_id="model_id",
+        )
+        print(response.identifier)
+
+
+asyncio.run(main())
+```
+
 ## Streaming responses
 
 We provide support for streaming responses using Server Side Events (SSE).
diff --git a/api.md b/api.md
index bd1949f8..f57f325f 100644
--- a/api.md
+++ b/api.md
@@ -7,7 +7,6 @@ from llama_stack_client.types import (
     Document,
     InterleavedContent,
     InterleavedContentItem,
-    Message,
     ParamType,
     QueryConfig,
     QueryResult,
@@ -300,27 +299,27 @@ Methods:
 Types:
 
 ```python
-from llama_stack_client.types import ListModelsResponse, Model, ModelListResponse
+from llama_stack_client.types import (
+    ListModelsResponse,
+    Model,
+    ModelRetrieveResponse,
+    ModelListResponse,
+    ModelRegisterResponse,
+)
 ```
 
 Methods:
 
-- <code title="get /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models/models.py">retrieve</a>(model_id) -> <a href="./src/llama_stack_client/types/model.py">Model</a></code>
-- <code title="get /v1/openai/v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
-- <code title="post /v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">register</a>(\*\*<a href="src/llama_stack_client/types/model_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/model.py">Model</a></code>
+- <code title="get /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models/models.py">retrieve</a>(model_id) -> <a href="./src/llama_stack_client/types/model_retrieve_response.py">ModelRetrieveResponse</a></code>
+- <code title="get /v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
+- <code title="post /v1/models">client.models.<a href="./src/llama_stack_client/resources/models/models.py">register</a>(\*\*<a href="src/llama_stack_client/types/model_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/model_register_response.py">ModelRegisterResponse</a></code>
 - <code title="delete /v1/models/{model_id}">client.models.<a href="./src/llama_stack_client/resources/models/models.py">unregister</a>(model_id) -> None</code>
 
 ## OpenAI
 
-Types:
-
-```python
-from llama_stack_client.types.models import OpenAIListResponse
-```
-
 Methods:
 
-- <code title="get /v1/models">client.models.openai.<a href="./src/llama_stack_client/resources/models/openai.py">list</a>() -> <a href="./src/llama_stack_client/types/models/openai_list_response.py">OpenAIListResponse</a></code>
+- <code title="get /v1/models">client.models.openai.<a href="./src/llama_stack_client/resources/models/openai.py">list</a>() -> <a href="./src/llama_stack_client/types/model_list_response.py">ModelListResponse</a></code>
 
 # Providers
 
@@ -386,18 +385,6 @@ Methods:
 - <code title="delete /v1/shields/{identifier}">client.shields.<a href="./src/llama_stack_client/resources/shields.py">delete</a>(identifier) -> None</code>
 - <code title="post /v1/shields">client.shields.<a href="./src/llama_stack_client/resources/shields.py">register</a>(\*\*<a href="src/llama_stack_client/types/shield_register_params.py">params</a>) -> <a href="./src/llama_stack_client/types/shield.py">Shield</a></code>
 
-# SyntheticDataGeneration
-
-Types:
-
-```python
-from llama_stack_client.types import SyntheticDataGenerationResponse
-```
-
-Methods:
-
-- <code title="post /v1/synthetic-data-generation/generate">client.synthetic_data_generation.<a href="./src/llama_stack_client/resources/synthetic_data_generation.py">generate</a>(\*\*<a href="src/llama_stack_client/types/synthetic_data_generation_generate_params.py">params</a>) -> <a href="./src/llama_stack_client/types/synthetic_data_generation_response.py">SyntheticDataGenerationResponse</a></code>
-
 # Scoring
 
 Types:
diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
index b1880a7e..d42e6fb0 100644
--- a/src/llama_stack_client/_client.py
+++ b/src/llama_stack_client/_client.py
@@ -62,7 +62,6 @@
         conversations,
         vector_stores,
         scoring_functions,
-        synthetic_data_generation,
     )
     from .resources.files import FilesResource, AsyncFilesResource
     from .resources.tools import ToolsResource, AsyncToolsResource
@@ -84,10 +83,6 @@
     from .resources.prompts.prompts import PromptsResource, AsyncPromptsResource
     from .resources.scoring_functions import ScoringFunctionsResource, AsyncScoringFunctionsResource
     from .resources.responses.responses import ResponsesResource, AsyncResponsesResource
-    from .resources.synthetic_data_generation import (
-        SyntheticDataGenerationResource,
-        AsyncSyntheticDataGenerationResource,
-    )
     from .resources.tool_runtime.tool_runtime import ToolRuntimeResource, AsyncToolRuntimeResource
     from .resources.conversations.conversations import ConversationsResource, AsyncConversationsResource
     from .resources.vector_stores.vector_stores import VectorStoresResource, AsyncVectorStoresResource
@@ -269,12 +264,6 @@ def shields(self) -> ShieldsResource:
 
         return ShieldsResource(self)
 
-    @cached_property
-    def synthetic_data_generation(self) -> SyntheticDataGenerationResource:
-        from .resources.synthetic_data_generation import SyntheticDataGenerationResource
-
-        return SyntheticDataGenerationResource(self)
-
     @cached_property
     def scoring(self) -> ScoringResource:
         from .resources.scoring import ScoringResource
@@ -585,12 +574,6 @@ def shields(self) -> AsyncShieldsResource:
 
         return AsyncShieldsResource(self)
 
-    @cached_property
-    def synthetic_data_generation(self) -> AsyncSyntheticDataGenerationResource:
-        from .resources.synthetic_data_generation import AsyncSyntheticDataGenerationResource
-
-        return AsyncSyntheticDataGenerationResource(self)
-
     @cached_property
     def scoring(self) -> AsyncScoringResource:
         from .resources.scoring import AsyncScoringResource
@@ -850,12 +833,6 @@ def shields(self) -> shields.ShieldsResourceWithRawResponse:
 
         return ShieldsResourceWithRawResponse(self._client.shields)
 
-    @cached_property
-    def synthetic_data_generation(self) -> synthetic_data_generation.SyntheticDataGenerationResourceWithRawResponse:
-        from .resources.synthetic_data_generation import SyntheticDataGenerationResourceWithRawResponse
-
-        return SyntheticDataGenerationResourceWithRawResponse(self._client.synthetic_data_generation)
-
     @cached_property
     def scoring(self) -> scoring.ScoringResourceWithRawResponse:
         from .resources.scoring import ScoringResourceWithRawResponse
@@ -1001,14 +978,6 @@ def shields(self) -> shields.AsyncShieldsResourceWithRawResponse:
 
         return AsyncShieldsResourceWithRawResponse(self._client.shields)
 
-    @cached_property
-    def synthetic_data_generation(
-        self,
-    ) -> synthetic_data_generation.AsyncSyntheticDataGenerationResourceWithRawResponse:
-        from .resources.synthetic_data_generation import AsyncSyntheticDataGenerationResourceWithRawResponse
-
-        return AsyncSyntheticDataGenerationResourceWithRawResponse(self._client.synthetic_data_generation)
-
     @cached_property
     def scoring(self) -> scoring.AsyncScoringResourceWithRawResponse:
         from .resources.scoring import AsyncScoringResourceWithRawResponse
@@ -1154,14 +1123,6 @@ def shields(self) -> shields.ShieldsResourceWithStreamingResponse:
 
         return ShieldsResourceWithStreamingResponse(self._client.shields)
 
-    @cached_property
-    def synthetic_data_generation(
-        self,
-    ) -> synthetic_data_generation.SyntheticDataGenerationResourceWithStreamingResponse:
-        from .resources.synthetic_data_generation import SyntheticDataGenerationResourceWithStreamingResponse
-
-        return SyntheticDataGenerationResourceWithStreamingResponse(self._client.synthetic_data_generation)
-
     @cached_property
     def scoring(self) -> scoring.ScoringResourceWithStreamingResponse:
         from .resources.scoring import ScoringResourceWithStreamingResponse
@@ -1307,14 +1268,6 @@ def shields(self) -> shields.AsyncShieldsResourceWithStreamingResponse:
 
         return AsyncShieldsResourceWithStreamingResponse(self._client.shields)
 
-    @cached_property
-    def synthetic_data_generation(
-        self,
-    ) -> synthetic_data_generation.AsyncSyntheticDataGenerationResourceWithStreamingResponse:
-        from .resources.synthetic_data_generation import AsyncSyntheticDataGenerationResourceWithStreamingResponse
-
-        return AsyncSyntheticDataGenerationResourceWithStreamingResponse(self._client.synthetic_data_generation)
-
     @cached_property
     def scoring(self) -> scoring.AsyncScoringResourceWithStreamingResponse:
         from .resources.scoring import AsyncScoringResourceWithStreamingResponse
diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py
index 807ed6c8..0255b3e3 100644
--- a/src/llama_stack_client/resources/__init__.py
+++ b/src/llama_stack_client/resources/__init__.py
@@ -190,14 +190,6 @@
     ScoringFunctionsResourceWithStreamingResponse,
     AsyncScoringFunctionsResourceWithStreamingResponse,
 )
-from .synthetic_data_generation import (
-    SyntheticDataGenerationResource,
-    AsyncSyntheticDataGenerationResource,
-    SyntheticDataGenerationResourceWithRawResponse,
-    AsyncSyntheticDataGenerationResourceWithRawResponse,
-    SyntheticDataGenerationResourceWithStreamingResponse,
-    AsyncSyntheticDataGenerationResourceWithStreamingResponse,
-)
 
 __all__ = [
     "ToolgroupsResource",
@@ -308,12 +300,6 @@
     "AsyncShieldsResourceWithRawResponse",
     "ShieldsResourceWithStreamingResponse",
     "AsyncShieldsResourceWithStreamingResponse",
-    "SyntheticDataGenerationResource",
-    "AsyncSyntheticDataGenerationResource",
-    "SyntheticDataGenerationResourceWithRawResponse",
-    "AsyncSyntheticDataGenerationResourceWithRawResponse",
-    "SyntheticDataGenerationResourceWithStreamingResponse",
-    "AsyncSyntheticDataGenerationResourceWithStreamingResponse",
     "ScoringResource",
     "AsyncScoringResource",
     "ScoringResourceWithRawResponse",
diff --git a/src/llama_stack_client/resources/models/models.py b/src/llama_stack_client/resources/models/models.py
index dc7e0f4d..0b0dc2dd 100644
--- a/src/llama_stack_client/resources/models/models.py
+++ b/src/llama_stack_client/resources/models/models.py
@@ -33,9 +33,10 @@
     async_to_streamed_response_wrapper,
 )
 from ..._wrappers import DataWrapper
-from ...types.model import Model
 from ..._base_client import make_request_options
 from ...types.model_list_response import ModelListResponse
+from ...types.model_register_response import ModelRegisterResponse
+from ...types.model_retrieve_response import ModelRetrieveResponse
 
 __all__ = ["ModelsResource", "AsyncModelsResource"]
 
@@ -74,7 +75,7 @@ def retrieve(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Model:
+    ) -> ModelRetrieveResponse:
         """Get model.
 
         Get a model by its identifier.
@@ -95,7 +96,7 @@ def retrieve(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Model,
+            cast_to=ModelRetrieveResponse,
         )
 
     def list(
@@ -110,7 +111,7 @@ def list(
     ) -> ModelListResponse:
         """List models using the OpenAI API."""
         return self._get(
-            "/v1/openai/v1/models",
+            "/v1/models",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -135,7 +136,7 @@ def register(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Model:
+    ) -> ModelRegisterResponse:
         """Register model.
 
         Register a model.
@@ -174,7 +175,7 @@ def register(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Model,
+            cast_to=ModelRegisterResponse,
         )
 
     def unregister(
@@ -247,7 +248,7 @@ async def retrieve(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Model:
+    ) -> ModelRetrieveResponse:
         """Get model.
 
         Get a model by its identifier.
@@ -268,7 +269,7 @@ async def retrieve(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Model,
+            cast_to=ModelRetrieveResponse,
         )
 
     async def list(
@@ -283,7 +284,7 @@ async def list(
     ) -> ModelListResponse:
         """List models using the OpenAI API."""
         return await self._get(
-            "/v1/openai/v1/models",
+            "/v1/models",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
@@ -308,7 +309,7 @@ async def register(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> Model:
+    ) -> ModelRegisterResponse:
         """Register model.
 
         Register a model.
@@ -347,7 +348,7 @@ async def register(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Model,
+            cast_to=ModelRegisterResponse,
         )
 
     async def unregister(
diff --git a/src/llama_stack_client/resources/models/openai.py b/src/llama_stack_client/resources/models/openai.py
index c581f714..954d3c6b 100644
--- a/src/llama_stack_client/resources/models/openai.py
+++ b/src/llama_stack_client/resources/models/openai.py
@@ -23,7 +23,7 @@
 )
 from ..._wrappers import DataWrapper
 from ..._base_client import make_request_options
-from ...types.models.openai_list_response import OpenAIListResponse
+from ...types.model_list_response import ModelListResponse
 
 __all__ = ["OpenAIResource", "AsyncOpenAIResource"]
 
@@ -57,8 +57,8 @@ def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> OpenAIListResponse:
-        """List all models."""
+    ) -> ModelListResponse:
+        """List models using the OpenAI API."""
         return self._get(
             "/v1/models",
             options=make_request_options(
@@ -66,9 +66,9 @@ def list(
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                post_parser=DataWrapper[OpenAIListResponse]._unwrapper,
+                post_parser=DataWrapper[ModelListResponse]._unwrapper,
             ),
-            cast_to=cast(Type[OpenAIListResponse], DataWrapper[OpenAIListResponse]),
+            cast_to=cast(Type[ModelListResponse], DataWrapper[ModelListResponse]),
         )
 
 
@@ -101,8 +101,8 @@ async def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> OpenAIListResponse:
-        """List all models."""
+    ) -> ModelListResponse:
+        """List models using the OpenAI API."""
         return await self._get(
             "/v1/models",
             options=make_request_options(
@@ -110,9 +110,9 @@ async def list(
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                post_parser=DataWrapper[OpenAIListResponse]._unwrapper,
+                post_parser=DataWrapper[ModelListResponse]._unwrapper,
             ),
-            cast_to=cast(Type[OpenAIListResponse], DataWrapper[OpenAIListResponse]),
+            cast_to=cast(Type[ModelListResponse], DataWrapper[ModelListResponse]),
         )
 
 
diff --git a/src/llama_stack_client/resources/synthetic_data_generation.py b/src/llama_stack_client/resources/synthetic_data_generation.py
deleted file mode 100644
index c8c3d431..00000000
--- a/src/llama_stack_client/resources/synthetic_data_generation.py
+++ /dev/null
@@ -1,205 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Iterable
-from typing_extensions import Literal
-
-import httpx
-
-from ..types import synthetic_data_generation_generate_params
-from .._types import Body, Omit, Query, Headers, NotGiven, omit, not_given
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._base_client import make_request_options
-from ..types.shared_params.message import Message
-from ..types.synthetic_data_generation_response import SyntheticDataGenerationResponse
-
-__all__ = ["SyntheticDataGenerationResource", "AsyncSyntheticDataGenerationResource"]
-
-
-class SyntheticDataGenerationResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> SyntheticDataGenerationResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return SyntheticDataGenerationResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> SyntheticDataGenerationResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return SyntheticDataGenerationResourceWithStreamingResponse(self)
-
-    def generate(
-        self,
-        *,
-        dialogs: Iterable[Message],
-        filtering_function: Literal["none", "random", "top_k", "top_p", "top_k_top_p", "sigmoid"],
-        model: str | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> SyntheticDataGenerationResponse:
-        """
-        Generate synthetic data based on input dialogs and apply filtering.
-
-        Args:
-          dialogs: List of conversation messages to use as input for synthetic data generation
-
-          filtering_function: Type of filtering to apply to generated synthetic data samples
-
-          model: (Optional) The identifier of the model to use. The model must be registered with
-              Llama Stack and available via the /models endpoint
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/synthetic-data-generation/generate",
-            body=maybe_transform(
-                {
-                    "dialogs": dialogs,
-                    "filtering_function": filtering_function,
-                    "model": model,
-                },
-                synthetic_data_generation_generate_params.SyntheticDataGenerationGenerateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=SyntheticDataGenerationResponse,
-        )
-
-
-class AsyncSyntheticDataGenerationResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncSyntheticDataGenerationResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncSyntheticDataGenerationResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncSyntheticDataGenerationResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncSyntheticDataGenerationResourceWithStreamingResponse(self)
-
-    async def generate(
-        self,
-        *,
-        dialogs: Iterable[Message],
-        filtering_function: Literal["none", "random", "top_k", "top_p", "top_k_top_p", "sigmoid"],
-        model: str | Omit = omit,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = not_given,
-    ) -> SyntheticDataGenerationResponse:
-        """
-        Generate synthetic data based on input dialogs and apply filtering.
-
-        Args:
-          dialogs: List of conversation messages to use as input for synthetic data generation
-
-          filtering_function: Type of filtering to apply to generated synthetic data samples
-
-          model: (Optional) The identifier of the model to use. The model must be registered with
-              Llama Stack and available via the /models endpoint
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/synthetic-data-generation/generate",
-            body=await async_maybe_transform(
-                {
-                    "dialogs": dialogs,
-                    "filtering_function": filtering_function,
-                    "model": model,
-                },
-                synthetic_data_generation_generate_params.SyntheticDataGenerationGenerateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=SyntheticDataGenerationResponse,
-        )
-
-
-class SyntheticDataGenerationResourceWithRawResponse:
-    def __init__(self, synthetic_data_generation: SyntheticDataGenerationResource) -> None:
-        self._synthetic_data_generation = synthetic_data_generation
-
-        self.generate = to_raw_response_wrapper(
-            synthetic_data_generation.generate,
-        )
-
-
-class AsyncSyntheticDataGenerationResourceWithRawResponse:
-    def __init__(self, synthetic_data_generation: AsyncSyntheticDataGenerationResource) -> None:
-        self._synthetic_data_generation = synthetic_data_generation
-
-        self.generate = async_to_raw_response_wrapper(
-            synthetic_data_generation.generate,
-        )
-
-
-class SyntheticDataGenerationResourceWithStreamingResponse:
-    def __init__(self, synthetic_data_generation: SyntheticDataGenerationResource) -> None:
-        self._synthetic_data_generation = synthetic_data_generation
-
-        self.generate = to_streamed_response_wrapper(
-            synthetic_data_generation.generate,
-        )
-
-
-class AsyncSyntheticDataGenerationResourceWithStreamingResponse:
-    def __init__(self, synthetic_data_generation: AsyncSyntheticDataGenerationResource) -> None:
-        self._synthetic_data_generation = synthetic_data_generation
-
-        self.generate = async_to_streamed_response_wrapper(
-            synthetic_data_generation.generate,
-        )
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index 1a67f6c4..03e243a5 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -12,7 +12,6 @@
 from .model import Model as Model
 from .prompt import Prompt as Prompt
 from .shared import (
-    Message as Message,
     Document as Document,
     ToolCall as ToolCall,
     ParamType as ParamType,
@@ -78,6 +77,8 @@
 from .vector_io_query_params import VectorIoQueryParams as VectorIoQueryParams
 from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
 from .list_providers_response import ListProvidersResponse as ListProvidersResponse
+from .model_register_response import ModelRegisterResponse as ModelRegisterResponse
+from .model_retrieve_response import ModelRetrieveResponse as ModelRetrieveResponse
 from .scoring_fn_params_param import ScoringFnParamsParam as ScoringFnParamsParam
 from .toolgroup_list_response import ToolgroupListResponse as ToolgroupListResponse
 from .vector_io_insert_params import VectorIoInsertParams as VectorIoInsertParams
@@ -108,7 +109,3 @@
 from .scoring_function_register_params import ScoringFunctionRegisterParams as ScoringFunctionRegisterParams
 from .tool_runtime_list_tools_response import ToolRuntimeListToolsResponse as ToolRuntimeListToolsResponse
 from .prompt_set_default_version_params import PromptSetDefaultVersionParams as PromptSetDefaultVersionParams
-from .synthetic_data_generation_response import SyntheticDataGenerationResponse as SyntheticDataGenerationResponse
-from .synthetic_data_generation_generate_params import (
-    SyntheticDataGenerationGenerateParams as SyntheticDataGenerationGenerateParams,
-)
diff --git a/src/llama_stack_client/types/model.py b/src/llama_stack_client/types/model.py
index 5807b0ed..5a2f4305 100644
--- a/src/llama_stack_client/types/model.py
+++ b/src/llama_stack_client/types/model.py
@@ -6,31 +6,22 @@
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+import builtins
 from typing import Dict, List, Union, Optional
 from typing_extensions import Literal
 
-from pydantic import Field as FieldInfo
-
 from .._models import BaseModel
 
 __all__ = ["Model"]
 
 
 class Model(BaseModel):
-    identifier: str
-    """Unique identifier for this resource in llama stack"""
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-    """Any additional metadata for this model"""
+    id: str
 
-    api_model_type: Literal["llm", "embedding", "rerank"] = FieldInfo(alias="model_type")
-    """The type of model (LLM or embedding model)"""
+    created: int
 
-    provider_id: str
-    """ID of the provider that owns this resource"""
+    object: Literal["model"]
 
-    type: Literal["model"]
-    """The resource type, always 'model' for model resources"""
+    owned_by: str
 
-    provider_resource_id: Optional[str] = None
-    """Unique identifier for this resource in the provider"""
+    custom_metadata: Optional[Dict[str, Union[bool, float, str, List[builtins.object], builtins.object, None]]] = None
diff --git a/src/llama_stack_client/types/model_list_response.py b/src/llama_stack_client/types/model_list_response.py
index c42b3310..b53ae421 100644
--- a/src/llama_stack_client/types/model_list_response.py
+++ b/src/llama_stack_client/types/model_list_response.py
@@ -6,25 +6,11 @@
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-import builtins
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, TypeAlias
+from typing import List
+from typing_extensions import TypeAlias
 
-from .._models import BaseModel
+from .model import Model
 
-__all__ = ["ModelListResponse", "ModelListResponseItem"]
+__all__ = ["ModelListResponse"]
 
-
-class ModelListResponseItem(BaseModel):
-    id: str
-
-    created: int
-
-    object: Literal["model"]
-
-    owned_by: str
-
-    custom_metadata: Optional[Dict[str, Union[bool, float, str, List[builtins.object], builtins.object, None]]] = None
-
-
-ModelListResponse: TypeAlias = List[ModelListResponseItem]
+ModelListResponse: TypeAlias = List[Model]
diff --git a/src/llama_stack_client/types/model_register_response.py b/src/llama_stack_client/types/model_register_response.py
new file mode 100644
index 00000000..08b0e8d6
--- /dev/null
+++ b/src/llama_stack_client/types/model_register_response.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["ModelRegisterResponse"]
+
+
+class ModelRegisterResponse(BaseModel):
+    identifier: str
+    """Unique identifier for this resource in llama stack"""
+
+    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
+    """Any additional metadata for this model"""
+
+    api_model_type: Literal["llm", "embedding", "rerank"] = FieldInfo(alias="model_type")
+    """The type of model (LLM or embedding model)"""
+
+    provider_id: str
+    """ID of the provider that owns this resource"""
+
+    type: Literal["model"]
+    """The resource type, always 'model' for model resources"""
+
+    provider_resource_id: Optional[str] = None
+    """Unique identifier for this resource in the provider"""
diff --git a/src/llama_stack_client/types/model_retrieve_response.py b/src/llama_stack_client/types/model_retrieve_response.py
new file mode 100644
index 00000000..58f477d8
--- /dev/null
+++ b/src/llama_stack_client/types/model_retrieve_response.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["ModelRetrieveResponse"]
+
+
+class ModelRetrieveResponse(BaseModel):
+    identifier: str
+    """Unique identifier for this resource in llama stack"""
+
+    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
+    """Any additional metadata for this model"""
+
+    api_model_type: Literal["llm", "embedding", "rerank"] = FieldInfo(alias="model_type")
+    """The type of model (LLM or embedding model)"""
+
+    provider_id: str
+    """ID of the provider that owns this resource"""
+
+    type: Literal["model"]
+    """The resource type, always 'model' for model resources"""
+
+    provider_resource_id: Optional[str] = None
+    """Unique identifier for this resource in the provider"""
diff --git a/src/llama_stack_client/types/models/__init__.py b/src/llama_stack_client/types/models/__init__.py
index bba1f3e9..d14ed874 100644
--- a/src/llama_stack_client/types/models/__init__.py
+++ b/src/llama_stack_client/types/models/__init__.py
@@ -7,5 +7,3 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from __future__ import annotations
-
-from .openai_list_response import OpenAIListResponse as OpenAIListResponse
diff --git a/src/llama_stack_client/types/models/openai_list_response.py b/src/llama_stack_client/types/models/openai_list_response.py
deleted file mode 100644
index 5b6c0358..00000000
--- a/src/llama_stack_client/types/models/openai_list_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import TypeAlias
-
-from ..model import Model
-
-__all__ = ["OpenAIListResponse"]
-
-OpenAIListResponse: TypeAlias = List[Model]
diff --git a/src/llama_stack_client/types/shared/__init__.py b/src/llama_stack_client/types/shared/__init__.py
index c18a9358..6ba732f0 100644
--- a/src/llama_stack_client/types/shared/__init__.py
+++ b/src/llama_stack_client/types/shared/__init__.py
@@ -6,7 +6,6 @@
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .message import Message as Message
 from .document import Document as Document
 from .tool_call import ToolCall as ToolCall
 from .param_type import ParamType as ParamType
diff --git a/src/llama_stack_client/types/shared/message.py b/src/llama_stack_client/types/shared/message.py
deleted file mode 100644
index bb47f940..00000000
--- a/src/llama_stack_client/types/shared/message.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from .user_message import UserMessage
-from .system_message import SystemMessage
-from .completion_message import CompletionMessage
-from .tool_response_message import ToolResponseMessage
-
-__all__ = ["Message"]
-
-Message: TypeAlias = Annotated[
-    Union[UserMessage, SystemMessage, ToolResponseMessage, CompletionMessage], PropertyInfo(discriminator="role")
-]
diff --git a/src/llama_stack_client/types/shared_params/__init__.py b/src/llama_stack_client/types/shared_params/__init__.py
index 12061849..f25d2430 100644
--- a/src/llama_stack_client/types/shared_params/__init__.py
+++ b/src/llama_stack_client/types/shared_params/__init__.py
@@ -6,16 +6,13 @@
 
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from .message import Message as Message
 from .document import Document as Document
-from .tool_call import ToolCall as ToolCall
 from .agent_config import AgentConfig as AgentConfig
 from .query_config import QueryConfig as QueryConfig
 from .user_message import UserMessage as UserMessage
 from .system_message import SystemMessage as SystemMessage
 from .response_format import ResponseFormat as ResponseFormat
 from .sampling_params import SamplingParams as SamplingParams
-from .completion_message import CompletionMessage as CompletionMessage
 from .interleaved_content import InterleavedContent as InterleavedContent
 from .tool_response_message import ToolResponseMessage as ToolResponseMessage
 from .interleaved_content_item import InterleavedContentItem as InterleavedContentItem
diff --git a/src/llama_stack_client/types/shared_params/completion_message.py b/src/llama_stack_client/types/shared_params/completion_message.py
deleted file mode 100644
index 61c3fc08..00000000
--- a/src/llama_stack_client/types/shared_params/completion_message.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from .tool_call import ToolCall
-from .interleaved_content import InterleavedContent
-
-__all__ = ["CompletionMessage"]
-
-
-class CompletionMessage(TypedDict, total=False):
-    content: Required[InterleavedContent]
-    """The content of the model's response"""
-
-    role: Required[Literal["assistant"]]
-    """Must be "assistant" to identify this as the model's response"""
-
-    stop_reason: Required[Literal["end_of_turn", "end_of_message", "out_of_tokens"]]
-    """Reason why the model stopped generating.
-
-    Options are: - `StopReason.end_of_turn`: The model finished generating the
-    entire response. - `StopReason.end_of_message`: The model finished generating
-    but generated a partial response -- usually, a tool call. The user may call the
-    tool and continue the conversation with the tool's response. -
-    `StopReason.out_of_tokens`: The model ran out of token budget.
-    """
-
-    tool_calls: Iterable[ToolCall]
-    """List of tool calls. Each tool call is a ToolCall object."""
diff --git a/src/llama_stack_client/types/shared_params/message.py b/src/llama_stack_client/types/shared_params/message.py
deleted file mode 100644
index 2a2c0f75..00000000
--- a/src/llama_stack_client/types/shared_params/message.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import TypeAlias
-
-from .user_message import UserMessage
-from .system_message import SystemMessage
-from .completion_message import CompletionMessage
-from .tool_response_message import ToolResponseMessage
-
-__all__ = ["Message"]
-
-Message: TypeAlias = Union[UserMessage, SystemMessage, ToolResponseMessage, CompletionMessage]
diff --git a/src/llama_stack_client/types/shared_params/tool_call.py b/src/llama_stack_client/types/shared_params/tool_call.py
deleted file mode 100644
index 320b5a8e..00000000
--- a/src/llama_stack_client/types/shared_params/tool_call.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ToolCall"]
-
-
-class ToolCall(TypedDict, total=False):
-    arguments: Required[str]
-
-    call_id: Required[str]
-
-    tool_name: Required[Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]]
diff --git a/src/llama_stack_client/types/synthetic_data_generation_generate_params.py b/src/llama_stack_client/types/synthetic_data_generation_generate_params.py
deleted file mode 100644
index 2f817132..00000000
--- a/src/llama_stack_client/types/synthetic_data_generation_generate_params.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from .shared_params.message import Message
-
-__all__ = ["SyntheticDataGenerationGenerateParams"]
-
-
-class SyntheticDataGenerationGenerateParams(TypedDict, total=False):
-    dialogs: Required[Iterable[Message]]
-    """List of conversation messages to use as input for synthetic data generation"""
-
-    filtering_function: Required[Literal["none", "random", "top_k", "top_p", "top_k_top_p", "sigmoid"]]
-    """Type of filtering to apply to generated synthetic data samples"""
-
-    model: str
-    """(Optional) The identifier of the model to use.
-
-    The model must be registered with Llama Stack and available via the /models
-    endpoint
-    """
diff --git a/src/llama_stack_client/types/synthetic_data_generation_response.py b/src/llama_stack_client/types/synthetic_data_generation_response.py
deleted file mode 100644
index 9332562f..00000000
--- a/src/llama_stack_client/types/synthetic_data_generation_response.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-
-from .._models import BaseModel
-
-__all__ = ["SyntheticDataGenerationResponse"]
-
-
-class SyntheticDataGenerationResponse(BaseModel):
-    synthetic_data: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """List of generated synthetic data samples that passed the filtering criteria"""
-
-    statistics: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-    """
-    (Optional) Statistical information about the generation process and filtering
-    results
-    """
diff --git a/tests/api_resources/models/test_openai.py b/tests/api_resources/models/test_openai.py
index 96955333..6a9acf23 100644
--- a/tests/api_resources/models/test_openai.py
+++ b/tests/api_resources/models/test_openai.py
@@ -15,7 +15,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.models import OpenAIListResponse
+from llama_stack_client.types import ModelListResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -26,7 +26,7 @@ class TestOpenAI:
     @parametrize
     def test_method_list(self, client: LlamaStackClient) -> None:
         openai = client.models.openai.list()
-        assert_matches_type(OpenAIListResponse, openai, path=["response"])
+        assert_matches_type(ModelListResponse, openai, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
@@ -35,7 +35,7 @@ def test_raw_response_list(self, client: LlamaStackClient) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         openai = response.parse()
-        assert_matches_type(OpenAIListResponse, openai, path=["response"])
+        assert_matches_type(ModelListResponse, openai, path=["response"])
 
     @parametrize
     def test_streaming_response_list(self, client: LlamaStackClient) -> None:
@@ -44,7 +44,7 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             openai = response.parse()
-            assert_matches_type(OpenAIListResponse, openai, path=["response"])
+            assert_matches_type(ModelListResponse, openai, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -57,7 +57,7 @@ class TestAsyncOpenAI:
     @parametrize
     async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         openai = await async_client.models.openai.list()
-        assert_matches_type(OpenAIListResponse, openai, path=["response"])
+        assert_matches_type(ModelListResponse, openai, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
@@ -66,7 +66,7 @@ async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> N
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         openai = await response.parse()
-        assert_matches_type(OpenAIListResponse, openai, path=["response"])
+        assert_matches_type(ModelListResponse, openai, path=["response"])
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
@@ -75,6 +75,6 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             openai = await response.parse()
-            assert_matches_type(OpenAIListResponse, openai, path=["response"])
+            assert_matches_type(ModelListResponse, openai, path=["response"])
 
         assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index 13e98aa3..89fb7e7c 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -15,7 +15,11 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import Model, ModelListResponse
+from llama_stack_client.types import (
+    ModelListResponse,
+    ModelRegisterResponse,
+    ModelRetrieveResponse,
+)
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -28,7 +32,7 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None:
         model = client.models.retrieve(
             "model_id",
         )
-        assert_matches_type(Model, model, path=["response"])
+        assert_matches_type(ModelRetrieveResponse, model, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
@@ -39,7 +43,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
-        assert_matches_type(Model, model, path=["response"])
+        assert_matches_type(ModelRetrieveResponse, model, path=["response"])
 
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
@@ -50,7 +54,7 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             model = response.parse()
-            assert_matches_type(Model, model, path=["response"])
+            assert_matches_type(ModelRetrieveResponse, model, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -91,7 +95,7 @@ def test_method_register(self, client: LlamaStackClient) -> None:
         model = client.models.register(
             model_id="model_id",
         )
-        assert_matches_type(Model, model, path=["response"])
+        assert_matches_type(ModelRegisterResponse, model, path=["response"])
 
     @parametrize
     def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
@@ -102,7 +106,7 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None
             provider_id="provider_id",
             provider_model_id="provider_model_id",
         )
-        assert_matches_type(Model, model, path=["response"])
+        assert_matches_type(ModelRegisterResponse, model, path=["response"])
 
     @parametrize
     def test_raw_response_register(self, client: LlamaStackClient) -> None:
@@ -113,7 +117,7 @@ def test_raw_response_register(self, client: LlamaStackClient) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
-        assert_matches_type(Model, model, path=["response"])
+        assert_matches_type(ModelRegisterResponse, model, path=["response"])
 
     @parametrize
     def test_streaming_response_register(self, client: LlamaStackClient) -> None:
@@ -124,7 +128,7 @@ def test_streaming_response_register(self, client: LlamaStackClient) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             model = response.parse()
-            assert_matches_type(Model, model, path=["response"])
+            assert_matches_type(ModelRegisterResponse, model, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -177,7 +181,7 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non
         model = await async_client.models.retrieve(
             "model_id",
         )
-        assert_matches_type(Model, model, path=["response"])
+        assert_matches_type(ModelRetrieveResponse, model, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
@@ -188,7 +192,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = await response.parse()
-        assert_matches_type(Model, model, path=["response"])
+        assert_matches_type(ModelRetrieveResponse, model, path=["response"])
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
@@ -199,7 +203,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             model = await response.parse()
-            assert_matches_type(Model, model, path=["response"])
+            assert_matches_type(ModelRetrieveResponse, model, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -240,7 +244,7 @@ async def test_method_register(self, async_client: AsyncLlamaStackClient) -> Non
         model = await async_client.models.register(
             model_id="model_id",
         )
-        assert_matches_type(Model, model, path=["response"])
+        assert_matches_type(ModelRegisterResponse, model, path=["response"])
 
     @parametrize
     async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
@@ -251,7 +255,7 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta
             provider_id="provider_id",
             provider_model_id="provider_model_id",
         )
-        assert_matches_type(Model, model, path=["response"])
+        assert_matches_type(ModelRegisterResponse, model, path=["response"])
 
     @parametrize
     async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
@@ -262,7 +266,7 @@ async def test_raw_response_register(self, async_client: AsyncLlamaStackClient)
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = await response.parse()
-        assert_matches_type(Model, model, path=["response"])
+        assert_matches_type(ModelRegisterResponse, model, path=["response"])
 
     @parametrize
     async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
@@ -273,7 +277,7 @@ async def test_streaming_response_register(self, async_client: AsyncLlamaStackCl
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             model = await response.parse()
-            assert_matches_type(Model, model, path=["response"])
+            assert_matches_type(ModelRegisterResponse, model, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
diff --git a/tests/api_resources/test_synthetic_data_generation.py b/tests/api_resources/test_synthetic_data_generation.py
deleted file mode 100644
index c40ddede..00000000
--- a/tests/api_resources/test_synthetic_data_generation.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import SyntheticDataGenerationResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestSyntheticDataGeneration:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_generate(self, client: LlamaStackClient) -> None:
-        synthetic_data_generation = client.synthetic_data_generation.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            filtering_function="none",
-        )
-        assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-    @parametrize
-    def test_method_generate_with_all_params(self, client: LlamaStackClient) -> None:
-        synthetic_data_generation = client.synthetic_data_generation.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            filtering_function="none",
-            model="model",
-        )
-        assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-    @parametrize
-    def test_raw_response_generate(self, client: LlamaStackClient) -> None:
-        response = client.synthetic_data_generation.with_raw_response.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            filtering_function="none",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        synthetic_data_generation = response.parse()
-        assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-    @parametrize
-    def test_streaming_response_generate(self, client: LlamaStackClient) -> None:
-        with client.synthetic_data_generation.with_streaming_response.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            filtering_function="none",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            synthetic_data_generation = response.parse()
-            assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncSyntheticDataGeneration:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_generate(self, async_client: AsyncLlamaStackClient) -> None:
-        synthetic_data_generation = await async_client.synthetic_data_generation.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            filtering_function="none",
-        )
-        assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-    @parametrize
-    async def test_method_generate_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        synthetic_data_generation = await async_client.synthetic_data_generation.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            filtering_function="none",
-            model="model",
-        )
-        assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-    @parametrize
-    async def test_raw_response_generate(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.synthetic_data_generation.with_raw_response.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            filtering_function="none",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        synthetic_data_generation = await response.parse()
-        assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_generate(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.synthetic_data_generation.with_streaming_response.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            filtering_function="none",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            synthetic_data_generation = await response.parse()
-            assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-        assert cast(Any, response.is_closed) is True

From 92bc34e5cb82a2353604617ed2c872453820b261 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Tue, 4 Nov 2025 02:40:57 +0000
Subject: [PATCH 2/2] release: 0.4.0-alpha.3

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 13 +++++++++++++
 pyproject.toml                |  2 +-
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 24b05bc4..4c5c3821 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.4.0-alpha.2"
+  ".": "0.4.0-alpha.3"
 }
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 236e5da7..b69b08da 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Changelog
 
+## 0.4.0-alpha.3 (2025-11-04)
+
+Full Changelog: [v0.4.0-alpha.2...v0.4.0-alpha.3](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.2...v0.4.0-alpha.3)
+
+### Features
+
+* **api:** remove openai/v1 endpoints ([e391af5](https://github.com/llamastack/llama-stack-client-python/commit/e391af5b47f10ca2c3fa7d36cacae1900af711b4))
+
+
+### Bug Fixes
+
+* update post_training CLI import path to use alpha subdirectory ([#293](https://github.com/llamastack/llama-stack-client-python/issues/293)) ([65cbe68](https://github.com/llamastack/llama-stack-client-python/commit/65cbe680ebeb30bf7e7cfebd1238f57bef792f5d))
+
 ## 0.4.0-alpha.2 (2025-11-03)
 
 Full Changelog: [v0.4.0-alpha.1...v0.4.0-alpha.2](https://github.com/llamastack/llama-stack-client-python/compare/v0.4.0-alpha.1...v0.4.0-alpha.2)
diff --git a/pyproject.toml b/pyproject.toml
index c75fa9fe..b85a739b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_client"
-version = "0.4.0-alpha.2"
+version = "0.4.0-alpha.3"
 description = "The official Python library for the llama-stack-client API"
 dynamic = ["readme"]
 license = "MIT"