Skip to content

Commit 351c45a

Browse files
author
Daniel Lorch
committed
feat: add support for Amazon Titan Multimodal Embeddings G1 and Amazon Nova Multimodal Embeddings
1 parent 1e71750 commit 351c45a

File tree

2 files changed

+112
-7
lines changed

2 files changed

+112
-7
lines changed

lib/idp_common_pkg/idp_common/bedrock/README.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,47 @@ embedding = client.generate_embedding(
7373
# Use embedding for vector search, clustering, etc.
7474
```
7575

76+
Amazon Titan Multimodal Embeddings support both text and image at the same time. The resulting embeddings vector averages the text embeddings and image embeddings vectors.
77+
78+
```python
79+
from idp_common.bedrock.client import BedrockClient
80+
81+
with open("/path/to/document.png", "rb") as image_file:
82+
image_data = image_file.read()
83+
84+
client = BedrockClient()
85+
embedding = client.generate_embedding(
86+
text="This document contains information about loan applications.",
87+
image_source=image_data,
88+
model_id="amazon.titan-embed-image-v1"
89+
)
90+
```
91+
92+
The image source can also be an S3 URI:
93+
94+
```python
95+
from idp_common.bedrock.client import BedrockClient
96+
97+
client = BedrockClient()
98+
embedding = client.generate_embedding(
99+
image_data="s3://bucket/key",
100+
model_id="amazon.titan-embed-image-v1"
101+
)
102+
```
103+
104+
Amazon Nova Multimodal Embeddings with 3072 dimension size:
105+
106+
```python
107+
from idp_common.bedrock.client import BedrockClient
108+
109+
client = BedrockClient()
110+
embedding = client.generate_embedding(
111+
image_data="s3://bucket/key",
112+
model_id="amazon.nova-2-multimodal-embeddings-v1:0",
113+
dimensions=3072
114+
)
115+
```
116+
76117
## Prompt Caching with CachePoint
77118

78119
Prompt caching is a powerful feature in Amazon Bedrock that significantly reduces response latency for workloads with repetitive contexts. The Bedrock client provides built-in support for this via the `<<CACHEPOINT>>` tag.

lib/idp_common_pkg/idp_common/bedrock/client.py

Lines changed: 71 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import copy
1717
import random
1818
import socket
19+
import base64
1920
from typing import Dict, Any, List, Optional, Union, Tuple, Type
2021
from botocore.config import Config
2122
from botocore.exceptions import (
@@ -25,7 +26,10 @@
2526
EndpointConnectionError,
2627
)
2728
from urllib3.exceptions import ReadTimeoutError as Urllib3ReadTimeoutError
28-
29+
from idp_common.image import (
30+
prepare_image,
31+
prepare_bedrock_image_attachment
32+
)
2933

3034
# Dummy exception classes for requests timeouts if requests is not available
3135
class _RequestsReadTimeout(Exception):
@@ -698,22 +702,29 @@ def get_guardrail_config(self) -> Optional[Dict[str, str]]:
698702

699703
def generate_embedding(
700704
self,
701-
text: str,
705+
text: str = "",
706+
image_source: Optional[Union[str, bytes]] = None,
702707
model_id: str = "amazon.titan-embed-text-v1",
708+
dimensions: int = 1024,
703709
max_retries: Optional[int] = None,
704710
) -> List[float]:
705711
"""
706-
Generate an embedding vector for the given text using Amazon Bedrock.
712+
Generate an embedding vector for the given text or image_source using Amazon Bedrock.
713+
At least one of text or the image is required to generate the embedding.
714+
For Titan Multimodal embedding models, you can include both to create an embeddings query vector that averages the resulting text embeddings and image embeddings vectors.
715+
For Nova Multimodal embedding models, exactly one of text or the image must be present, but not both.
707716
708717
Args:
709718
text: The text to generate embeddings for
719+
image_source: The image to generate embeddings for (can be either an S3 URI (s3://bucket/key) or raw image bytes)
710720
model_id: The embedding model ID to use (default: amazon.titan-embed-text-v1)
711721
max_retries: Optional override for the instance's max_retries setting
722+
dimensions: Length of the output embeddings vector
712723
713724
Returns:
714725
List of floats representing the embedding vector
715726
"""
716-
if not text or not isinstance(text, str):
727+
if (not text or not isinstance(text, str)) and (not image_source):
717728
# Return an empty vector for empty input
718729
return []
719730

@@ -728,12 +739,61 @@ def generate_embedding(
728739
# Normalize whitespace and prepare the input text
729740
normalized_text = " ".join(text.split())
730741

742+
# Convert image to base64
743+
if image_source:
744+
image_bytes = prepare_image(image_source)
745+
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
746+
747+
dimensions = int(dimensions)
748+
731749
# Prepare the request body based on the model
732-
if "amazon.titan-embed" in model_id:
733-
request_body = json.dumps({"inputText": normalized_text})
750+
payload_body: Dict[str, Any] = {}
751+
752+
if "amazon.titan-embed-text" in model_id:
753+
if not normalized_text:
754+
raise ValueError(
755+
"Amazon Titan Text models require a text parameter to generate embeddings for."
756+
)
757+
payload_body = {
758+
"inputText": normalized_text,
759+
"dimensions": dimensions,
760+
}
761+
elif "amazon.titan-embed-image" in model_id:
762+
payload_body = {
763+
"embeddingConfig": {
764+
"outputEmbeddingLength": dimensions,
765+
}
766+
}
767+
if normalized_text:
768+
payload_body["inputText"] = normalized_text
769+
if image_base64:
770+
payload_body["inputImage"] = image_base64
771+
elif "amazon.nova-2-multimodal-embeddings" in model_id:
772+
if normalized_text and image_source:
773+
raise ValueError(
774+
"Amazon Nova Multimodal Embedding models require exactly one of text or image parameter, but noth both at the same time."
775+
)
776+
payload_body = {
777+
"taskType": "SINGLE_EMBEDDING",
778+
"singleEmbeddingParams": {
779+
"embeddingPurpose": "GENERIC_INDEX",
780+
"embeddingDimension": dimensions,
781+
}
782+
}
783+
if normalized_text:
784+
payload_body["singleEmbeddingParams"]["text"] = {"truncationMode": "END", "value": normalized_text}
785+
if image_source:
786+
payload_body["singleEmbeddingParams"].update(prepare_bedrock_image_attachment(image_bytes)) # detect image format
787+
payload_body["singleEmbeddingParams"]["image"]["source"]["bytes"] = image_base64
734788
else:
735789
# Default format for other models
736-
request_body = json.dumps({"text": normalized_text})
790+
if not normalized_text:
791+
raise ValueError(
792+
"Default format requires a text parameter to generate embeddings for."
793+
)
794+
payload_body = {"text": normalized_text}
795+
796+
request_body = json.dumps(payload_body)
737797

738798
# Call the recursive embedding function
739799
return self._generate_embedding_with_retry(
@@ -792,6 +852,10 @@ def _generate_embedding_with_retry(
792852
# Handle different response formats based on the model
793853
if "amazon.titan-embed" in model_id:
794854
embedding = response_body.get("embedding", [])
855+
elif "amazon.titan-embed-image" in model_id:
856+
embedding = response_body.get("embedding", [])
857+
elif "amazon.nova-2-multimodal-embeddings" in model_id:
858+
embedding = response_body["embeddings"][0]["embedding"]
795859
else:
796860
# Default extraction format
797861
embedding = response_body.get("embedding", [])

0 commit comments

Comments
 (0)