Skip to content

Commit 4e3bf44

Browse files
committed
fix image format
1 parent 59f3738 commit 4e3bf44

File tree

1 file changed

+53
-6
lines changed

1 file changed

+53
-6
lines changed

lib/idp_common_pkg/idp_common/extraction/agentic_idp.py

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,44 @@
4747
view_todo_list,
4848
)
4949

50+
# Supported image formats for Bedrock API
51+
SUPPORTED_IMAGE_FORMATS = {"jpeg", "png", "gif", "webp"}
52+
53+
54+
def detect_image_format(image_bytes: bytes) -> str:
55+
"""
56+
Detect the image format from raw bytes.
57+
58+
Args:
59+
image_bytes: Raw image bytes
60+
61+
Returns:
62+
Image format string suitable for Bedrock API ('jpeg', 'png', 'gif', 'webp')
63+
64+
Raises:
65+
ValueError: If the image format is unsupported or cannot be detected
66+
"""
67+
try:
68+
img = Image.open(io.BytesIO(image_bytes))
69+
format_mapping = {
70+
"JPEG": "jpeg",
71+
"PNG": "png",
72+
"GIF": "gif",
73+
"WEBP": "webp",
74+
}
75+
detected_format = format_mapping.get(img.format)
76+
if not detected_format:
77+
raise ValueError(
78+
f"Unsupported image format: {img.format}. "
79+
f"Supported formats: {', '.join(SUPPORTED_IMAGE_FORMATS)}"
80+
)
81+
return detected_format
82+
except Exception as e:
83+
if "Unsupported image format" in str(e):
84+
raise
85+
raise ValueError(f"Failed to detect image format: {e}") from e
86+
87+
5088
# Use AWS Lambda Powertools Logger for structured logging
5189
# Automatically logs as JSON with Lambda context, request_id, timestamp, etc.
5290
# In Lambda: Full JSON structured logs
@@ -207,11 +245,15 @@ def view_image(image_index: int, agent: Agent) -> dict:
207245
# Get the base image (already has grid overlay)
208246
img_bytes = page_images[image_index]
209247

248+
# Detect actual image format from bytes
249+
img_format = detect_image_format(img_bytes)
250+
210251
logger.info(
211252
"Returning image to agent",
212253
extra={
213254
"image_index": image_index,
214255
"image_size_bytes": len(img_bytes),
256+
"image_format": img_format,
215257
},
216258
)
217259

@@ -220,7 +262,7 @@ def view_image(image_index: int, agent: Agent) -> dict:
220262
"content": [
221263
{
222264
"image": {
223-
"format": "png",
265+
"format": img_format,
224266
"source": {
225267
"bytes": img_bytes,
226268
},
@@ -775,12 +817,17 @@ def _prepare_prompt_content(
775817
extra={"image_count": len(page_images)},
776818
)
777819

778-
prompt_content += [
779-
ContentBlock(
780-
image=ImageContent(format="png", source=ImageSource(bytes=img_bytes))
820+
for img_bytes in page_images:
821+
# Detect actual image format from bytes
822+
img_format = detect_image_format(img_bytes)
823+
prompt_content.append(
824+
ContentBlock(
825+
image=ImageContent(
826+
format=img_format, # pyright: ignore[reportArgumentType]
827+
source=ImageSource(bytes=img_bytes),
828+
)
829+
)
781830
)
782-
for img_bytes in page_images
783-
]
784831

785832
# Add existing data context if provided
786833
if existing_data:

0 commit comments

Comments
 (0)