22
33from __future__ import annotations
44
5- from typing import List , Iterable
5+ from typing import List , Union , Iterable
66from typing_extensions import Literal , overload
77
88import httpx
3636from ..types .shared .chat_completion_response import ChatCompletionResponse
3737from ..types .shared_params .interleaved_content import InterleavedContent
3838from ..types .chat_completion_response_stream_chunk import ChatCompletionResponseStreamChunk
39+ from ..types .shared_params .interleaved_content_item import InterleavedContentItem
3940
4041__all__ = ["InferenceResource" , "AsyncInferenceResource" ]
4142
@@ -493,8 +494,11 @@ def completion(
493494 def embeddings (
494495 self ,
495496 * ,
496- contents : List [InterleavedContent ],
497+ contents : Union [ List [str ], Iterable [ InterleavedContentItem ] ],
497498 model_id : str ,
499+ output_dimension : int | NotGiven = NOT_GIVEN ,
500+ task_type : Literal ["query" , "document" ] | NotGiven = NOT_GIVEN ,
501+ text_truncation : Literal ["none" , "start" , "end" ] | NotGiven = NOT_GIVEN ,
498502 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
499503 # The extra values given here take precedence over values defined on the client or passed to this method.
500504 extra_headers : Headers | None = None ,
@@ -506,13 +510,22 @@ def embeddings(
506510 Generate embeddings for content pieces using the specified model.
507511
508512 Args:
509- contents: List of contents to generate embeddings for. Note that content can be
510- multimodal. The behavior depends on the model and provider. Some models may only
511- support text.
513+ contents: List of contents to generate embeddings for. Each content can be a string or an
514+ InterleavedContentItem (and hence can be multimodal). The behavior depends on
515+ the model and provider. Some models may only support text.
512516
513517 model_id: The identifier of the model to use. The model must be an embedding model
514518 registered with Llama Stack and available via the /models endpoint.
515519
520+ output_dimension: (Optional) Output dimensionality for the embeddings. Only supported by
521+ Matryoshka models.
522+
523+ task_type: (Optional) How is the embedding being used? This is only supported by asymmetric
524+ embedding models.
525+
526+ text_truncation: (Optional) Config for how to truncate text for embedding when text is longer
527+ than the model's max sequence length.
528+
516529 extra_headers: Send extra headers
517530
518531 extra_query: Add additional query parameters to the request
@@ -527,6 +540,9 @@ def embeddings(
527540 {
528541 "contents" : contents ,
529542 "model_id" : model_id ,
543+ "output_dimension" : output_dimension ,
544+ "task_type" : task_type ,
545+ "text_truncation" : text_truncation ,
530546 },
531547 inference_embeddings_params .InferenceEmbeddingsParams ,
532548 ),
@@ -990,8 +1006,11 @@ async def completion(
9901006 async def embeddings (
9911007 self ,
9921008 * ,
993- contents : List [InterleavedContent ],
1009+ contents : Union [ List [str ], Iterable [ InterleavedContentItem ] ],
9941010 model_id : str ,
1011+ output_dimension : int | NotGiven = NOT_GIVEN ,
1012+ task_type : Literal ["query" , "document" ] | NotGiven = NOT_GIVEN ,
1013+ text_truncation : Literal ["none" , "start" , "end" ] | NotGiven = NOT_GIVEN ,
9951014 # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
9961015 # The extra values given here take precedence over values defined on the client or passed to this method.
9971016 extra_headers : Headers | None = None ,
@@ -1003,13 +1022,22 @@ async def embeddings(
10031022 Generate embeddings for content pieces using the specified model.
10041023
10051024 Args:
1006- contents: List of contents to generate embeddings for. Note that content can be
1007- multimodal. The behavior depends on the model and provider. Some models may only
1008- support text.
1025+ contents: List of contents to generate embeddings for. Each content can be a string or an
1026+ InterleavedContentItem (and hence can be multimodal). The behavior depends on
1027+ the model and provider. Some models may only support text.
10091028
10101029 model_id: The identifier of the model to use. The model must be an embedding model
10111030 registered with Llama Stack and available via the /models endpoint.
10121031
1032+ output_dimension: (Optional) Output dimensionality for the embeddings. Only supported by
1033+ Matryoshka models.
1034+
1035+ task_type: (Optional) How is the embedding being used? This is only supported by asymmetric
1036+ embedding models.
1037+
1038+ text_truncation: (Optional) Config for how to truncate text for embedding when text is longer
1039+ than the model's max sequence length.
1040+
10131041 extra_headers: Send extra headers
10141042
10151043 extra_query: Add additional query parameters to the request
@@ -1024,6 +1052,9 @@ async def embeddings(
10241052 {
10251053 "contents" : contents ,
10261054 "model_id" : model_id ,
1055+ "output_dimension" : output_dimension ,
1056+ "task_type" : task_type ,
1057+ "text_truncation" : text_truncation ,
10271058 },
10281059 inference_embeddings_params .InferenceEmbeddingsParams ,
10291060 ),
0 commit comments