llamastack · hardikjshah · Jun 16, 2025 · Jun 16, 2025
diff --git a/src/llama_stack_client/resources/embeddings.py b/src/llama_stack_client/resources/embeddings.py
@@ -18,7 +18,7 @@
     async_to_streamed_response_wrapper,
 )
 from .._base_client import make_request_options
-from ..types.embeddings_response import EmbeddingsResponse
+from ..types.create_embeddings_response import CreateEmbeddingsResponse
 
 __all__ = ["EmbeddingsResource", "AsyncEmbeddingsResource"]
 
@@ -57,7 +57,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> EmbeddingsResponse:
+    ) -> CreateEmbeddingsResponse:
         """
         Generate OpenAI-compatible embeddings for the given input using the specified
         model.
@@ -101,7 +101,7 @@ def create(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=EmbeddingsResponse,
+            cast_to=CreateEmbeddingsResponse,
         )
 
 
@@ -139,7 +139,7 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> EmbeddingsResponse:
+    ) -> CreateEmbeddingsResponse:
         """
         Generate OpenAI-compatible embeddings for the given input using the specified
         model.
@@ -183,7 +183,7 @@ async def create(
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=EmbeddingsResponse,
+            cast_to=CreateEmbeddingsResponse,
         )
 
 

diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
@@ -119,6 +119,7 @@
 from .toolgroup_register_params import ToolgroupRegisterParams as ToolgroupRegisterParams
 from .vector_db_register_params import VectorDBRegisterParams as VectorDBRegisterParams
 from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
+from .create_embeddings_response import CreateEmbeddingsResponse as CreateEmbeddingsResponse
 from .eval_run_eval_alpha_params import EvalRunEvalAlphaParams as EvalRunEvalAlphaParams
 from .scoring_score_batch_params import ScoringScoreBatchParams as ScoringScoreBatchParams
 from .telemetry_log_event_params import TelemetryLogEventParams as TelemetryLogEventParams

diff --git a/src/llama_stack_client/types/create_embeddings_response.py b/src/llama_stack_client/types/create_embeddings_response.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["CreateEmbeddingsResponse", "Data", "Usage"]
+
+
+class Data(BaseModel):
+    embedding: Union[List[float], str]
+    """
+    The embedding vector as a list of floats (when encoding_format="float") or as a
+    base64-encoded string (when encoding_format="base64")
+    """
+
+    index: int
+    """The index of the embedding in the input list"""
+
+    object: Literal["embedding"]
+    """The object type, which will be "embedding" """
+
+
+class Usage(BaseModel):
+    prompt_tokens: int
+    """The number of tokens in the input"""
+
+    total_tokens: int
+    """The total number of tokens used"""
+
+
+class CreateEmbeddingsResponse(BaseModel):
+    data: List[Data]
+    """List of embedding data objects"""
+
+    model: str
+    """The model that was used to generate the embeddings"""
+
+    object: Literal["list"]
+    """The object type, which will be "list" """
+
+    usage: Usage
+    """Usage information"""
diff --git a/src/llama_stack_client/types/dataset_iterrows_response.py b/src/llama_stack_client/types/dataset_iterrows_response.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Dict, List, Union
+from typing import Dict, List, Union, Optional
 
 from .._models import BaseModel
 
@@ -13,3 +13,6 @@ class DatasetIterrowsResponse(BaseModel):
 
     has_more: bool
     """Whether there are more items available after this set"""
+
+    url: Optional[str] = None
+    """The URL for accessing this list"""
diff --git a/src/llama_stack_client/types/embeddings_response.py b/src/llama_stack_client/types/embeddings_response.py
@@ -1,44 +1,16 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List, Union
-from typing_extensions import Literal
+from typing import List
 
 from .._models import BaseModel
 
-__all__ = ["EmbeddingsResponse", "Data", "Usage"]
-
-
-class Data(BaseModel):
-    embedding: Union[List[float], str]
-    """
-    The embedding vector as a list of floats (when encoding_format="float") or as a
-    base64-encoded string (when encoding_format="base64")
-    """
-
-    index: int
-    """The index of the embedding in the input list"""
-
-    object: Literal["embedding"]
-    """The object type, which will be "embedding" """
-
-
-class Usage(BaseModel):
-    prompt_tokens: int
-    """The number of tokens in the input"""
-
-    total_tokens: int
-    """The total number of tokens used"""
+__all__ = ["EmbeddingsResponse"]
 
 
 class EmbeddingsResponse(BaseModel):
-    data: List[Data]
-    """List of embedding data objects"""
+    embeddings: List[List[float]]
+    """List of embedding vectors, one per input content.
 
-    model: str
-    """The model that was used to generate the embeddings"""
-
-    object: Literal["list"]
-    """The object type, which will be "list" """
-
-    usage: Usage
-    """Usage information"""
+    Each embedding is a list of floats. The dimensionality of the embedding is
+    model-specific; you can check model metadata using /models/{model_id}
+    """
diff --git a/tests/api_resources/test_embeddings.py b/tests/api_resources/test_embeddings.py
@@ -9,7 +9,7 @@
 
 from tests.utils import assert_matches_type
 from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import EmbeddingsResponse
+from llama_stack_client.types import CreateEmbeddingsResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
@@ -23,7 +23,7 @@ def test_method_create(self, client: LlamaStackClient) -> None:
             input="string",
             model="model",
         )
-        assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
@@ -34,7 +34,7 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
             encoding_format="encoding_format",
             user="user",
         )
-        assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: LlamaStackClient) -> None:
@@ -46,7 +46,7 @@ def test_raw_response_create(self, client: LlamaStackClient) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         embedding = response.parse()
-        assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
     @parametrize
     def test_streaming_response_create(self, client: LlamaStackClient) -> None:
@@ -58,7 +58,7 @@ def test_streaming_response_create(self, client: LlamaStackClient) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             embedding = response.parse()
-            assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+            assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -72,7 +72,7 @@ async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
             input="string",
             model="model",
         )
-        assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
@@ -83,7 +83,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack
             encoding_format="encoding_format",
             user="user",
         )
-        assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
@@ -95,7 +95,7 @@ async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) ->
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         embedding = await response.parse()
-        assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
@@ -107,6 +107,6 @@ async def test_streaming_response_create(self, async_client: AsyncLlamaStackClie
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             embedding = await response.parse()
-            assert_matches_type(EmbeddingsResponse, embedding, path=["response"])
+            assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
 
         assert cast(Any, response.is_closed) is True