Skip to content

Commit eb95054

Browse files
author
Daniel Lorch
committed
feat: add support for Amazon Titan Multimodal Embeddings G1 and Amazon Nova Multimodal Embeddings
1 parent a253ab9 commit eb95054

File tree

2 files changed

+112
-7
lines changed

2 files changed

+112
-7
lines changed

lib/idp_common_pkg/idp_common/bedrock/README.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,47 @@ embedding = client.generate_embedding(
7373
# Use embedding for vector search, clustering, etc.
7474
```
7575

76+
Amazon Titan Multimodal Embeddings support both text and image at the same time. The resulting embeddings vector averages the text embeddings and image embeddings vectors.
77+
78+
```python
79+
from idp_common.bedrock.client import BedrockClient
80+
81+
with open("/path/to/document.png", "rb") as image_file:
82+
image_data = image_file.read()
83+
84+
client = BedrockClient()
85+
embedding = client.generate_embedding(
86+
text="This document contains information about loan applications.",
87+
image_source=image_data,
88+
model_id="amazon.titan-embed-image-v1"
89+
)
90+
```
91+
92+
The image source can also be an S3 URI:
93+
94+
```python
95+
from idp_common.bedrock.client import BedrockClient
96+
97+
client = BedrockClient()
98+
embedding = client.generate_embedding(
99+
image_data="s3://bucket/key",
100+
model_id="amazon.titan-embed-image-v1"
101+
)
102+
```
103+
104+
Amazon Nova Multimodal Embeddings with 3072 dimension size:
105+
106+
```python
107+
from idp_common.bedrock.client import BedrockClient
108+
109+
client = BedrockClient()
110+
embedding = client.generate_embedding(
111+
image_data="s3://bucket/key",
112+
model_id="amazon.nova-2-multimodal-embeddings-v1:0",
113+
dimensions=3072
114+
)
115+
```
116+
76117
## Prompt Caching with CachePoint
77118

78119
Prompt caching is a powerful feature in Amazon Bedrock that significantly reduces response latency for workloads with repetitive contexts. The Bedrock client provides built-in support for this via the `<<CACHEPOINT>>` tag.

lib/idp_common_pkg/idp_common/bedrock/client.py

Lines changed: 71 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import copy
1717
import random
1818
import socket
19+
import base64
1920
from typing import Dict, Any, List, Optional, Union, Tuple, Type
2021
from botocore.config import Config
2122
from botocore.exceptions import (
@@ -25,7 +26,10 @@
2526
EndpointConnectionError,
2627
)
2728
from urllib3.exceptions import ReadTimeoutError as Urllib3ReadTimeoutError
28-
29+
from idp_common.image import (
30+
prepare_image,
31+
prepare_bedrock_image_attachment
32+
)
2933

3034
# Dummy exception classes for requests timeouts if requests is not available
3135
class _RequestsReadTimeout(Exception):
@@ -711,22 +715,29 @@ def get_guardrail_config(self) -> Optional[Dict[str, str]]:
711715

712716
def generate_embedding(
713717
self,
714-
text: str,
718+
text: str = "",
719+
image_source: Optional[Union[str, bytes]] = None,
715720
model_id: str = "amazon.titan-embed-text-v1",
721+
dimensions: int = 1024,
716722
max_retries: Optional[int] = None,
717723
) -> List[float]:
718724
"""
719-
Generate an embedding vector for the given text using Amazon Bedrock.
725+
Generate an embedding vector for the given text or image_source using Amazon Bedrock.
726+
At least one of text or the image is required to generate the embedding.
727+
For Titan Multimodal embedding models, you can include both to create an embeddings query vector that averages the resulting text embeddings and image embeddings vectors.
728+
For Nova Multimodal embedding models, exactly one of text or the image must be present, but not both.
720729
721730
Args:
722731
text: The text to generate embeddings for
732+
image_source: The image to generate embeddings for (can be either an S3 URI (s3://bucket/key) or raw image bytes)
723733
model_id: The embedding model ID to use (default: amazon.titan-embed-text-v1)
724734
max_retries: Optional override for the instance's max_retries setting
735+
dimensions: Length of the output embeddings vector
725736
726737
Returns:
727738
List of floats representing the embedding vector
728739
"""
729-
if not text or not isinstance(text, str):
740+
if (not text or not isinstance(text, str)) and (not image_source):
730741
# Return an empty vector for empty input
731742
return []
732743

@@ -741,12 +752,61 @@ def generate_embedding(
741752
# Normalize whitespace and prepare the input text
742753
normalized_text = " ".join(text.split())
743754

755+
# Convert image to base64
756+
if image_source:
757+
image_bytes = prepare_image(image_source)
758+
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
759+
760+
dimensions = int(dimensions)
761+
744762
# Prepare the request body based on the model
745-
if "amazon.titan-embed" in model_id:
746-
request_body = json.dumps({"inputText": normalized_text})
763+
payload_body: Dict[str, Any] = {}
764+
765+
if "amazon.titan-embed-text" in model_id:
766+
if not normalized_text:
767+
raise ValueError(
768+
"Amazon Titan Text models require a text parameter to generate embeddings for."
769+
)
770+
payload_body = {
771+
"inputText": normalized_text,
772+
"dimensions": dimensions,
773+
}
774+
elif "amazon.titan-embed-image" in model_id:
775+
payload_body = {
776+
"embeddingConfig": {
777+
"outputEmbeddingLength": dimensions,
778+
}
779+
}
780+
if normalized_text:
781+
payload_body["inputText"] = normalized_text
782+
if image_base64:
783+
payload_body["inputImage"] = image_base64
784+
elif "amazon.nova-2-multimodal-embeddings" in model_id:
785+
if normalized_text and image_source:
786+
raise ValueError(
787+
"Amazon Nova Multimodal Embedding models require exactly one of text or image parameter, but noth both at the same time."
788+
)
789+
payload_body = {
790+
"taskType": "SINGLE_EMBEDDING",
791+
"singleEmbeddingParams": {
792+
"embeddingPurpose": "GENERIC_INDEX",
793+
"embeddingDimension": dimensions,
794+
}
795+
}
796+
if normalized_text:
797+
payload_body["singleEmbeddingParams"]["text"] = {"truncationMode": "END", "value": normalized_text}
798+
if image_source:
799+
payload_body["singleEmbeddingParams"].update(prepare_bedrock_image_attachment(image_bytes)) # detect image format
800+
payload_body["singleEmbeddingParams"]["image"]["source"]["bytes"] = image_base64
747801
else:
748802
# Default format for other models
749-
request_body = json.dumps({"text": normalized_text})
803+
if not normalized_text:
804+
raise ValueError(
805+
"Default format requires a text parameter to generate embeddings for."
806+
)
807+
payload_body = {"text": normalized_text}
808+
809+
request_body = json.dumps(payload_body)
750810

751811
# Call the recursive embedding function
752812
return self._generate_embedding_with_retry(
@@ -805,6 +865,10 @@ def _generate_embedding_with_retry(
805865
# Handle different response formats based on the model
806866
if "amazon.titan-embed" in model_id:
807867
embedding = response_body.get("embedding", [])
868+
elif "amazon.titan-embed-image" in model_id:
869+
embedding = response_body.get("embedding", [])
870+
elif "amazon.nova-2-multimodal-embeddings" in model_id:
871+
embedding = response_body["embeddings"][0]["embedding"]
808872
else:
809873
# Default extraction format
810874
embedding = response_body.get("embedding", [])

0 commit comments

Comments
 (0)