fix image format

kazmer97 · kazmer97 · commit 4e3bf446fbd4 · 2025-12-08T12:01:51.000Z
diff --git a/lib/idp_common_pkg/idp_common/extraction/agentic_idp.py b/lib/idp_common_pkg/idp_common/extraction/agentic_idp.py
@@ -47,6 +47,44 @@
     view_todo_list,
 )
 
+# Supported image formats for Bedrock API
+SUPPORTED_IMAGE_FORMATS = {"jpeg", "png", "gif", "webp"}
+
+
+def detect_image_format(image_bytes: bytes) -> str:
+    """
+    Detect the image format from raw bytes.
+
+    Args:
+        image_bytes: Raw image bytes
+
+    Returns:
+        Image format string suitable for Bedrock API ('jpeg', 'png', 'gif', 'webp')
+
+    Raises:
+        ValueError: If the image format is unsupported or cannot be detected
+    """
+    try:
+        img = Image.open(io.BytesIO(image_bytes))
+        format_mapping = {
+            "JPEG": "jpeg",
+            "PNG": "png",
+            "GIF": "gif",
+            "WEBP": "webp",
+        }
+        detected_format = format_mapping.get(img.format)
+        if not detected_format:
+            raise ValueError(
+                f"Unsupported image format: {img.format}. "
+                f"Supported formats: {', '.join(SUPPORTED_IMAGE_FORMATS)}"
+            )
+        return detected_format
+    except Exception as e:
+        if "Unsupported image format" in str(e):
+            raise
+        raise ValueError(f"Failed to detect image format: {e}") from e
+
+
 # Use AWS Lambda Powertools Logger for structured logging
 # Automatically logs as JSON with Lambda context, request_id, timestamp, etc.
 # In Lambda: Full JSON structured logs
@@ -207,11 +245,15 @@ def view_image(image_index: int, agent: Agent) -> dict:
         # Get the base image (already has grid overlay)
         img_bytes = page_images[image_index]
 
+        # Detect actual image format from bytes
+        img_format = detect_image_format(img_bytes)
+
         logger.info(
             "Returning image to agent",
             extra={
                 "image_index": image_index,
                 "image_size_bytes": len(img_bytes),
+                "image_format": img_format,
             },
         )
 
@@ -220,7 +262,7 @@ def view_image(image_index: int, agent: Agent) -> dict:
             "content": [
                 {
                     "image": {
-                        "format": "png",
+                        "format": img_format,
                         "source": {
                             "bytes": img_bytes,
                         },
@@ -775,12 +817,17 @@ def _prepare_prompt_content(
             extra={"image_count": len(page_images)},
         )
 
-        prompt_content += [
-            ContentBlock(
-                image=ImageContent(format="png", source=ImageSource(bytes=img_bytes))
+        for img_bytes in page_images:
+            # Detect actual image format from bytes
+            img_format = detect_image_format(img_bytes)
+            prompt_content.append(
+                ContentBlock(
+                    image=ImageContent(
+                        format=img_format,  # pyright: ignore[reportArgumentType]
+                        source=ImageSource(bytes=img_bytes),
+                    )
+                )
             )
-            for img_bytes in page_images
-        ]
 
     # Add existing data context if provided
     if existing_data: