diff --git a/lib/idp_common_pkg/idp_common/extraction/agentic_idp.py b/lib/idp_common_pkg/idp_common/extraction/agentic_idp.py index 6d7e8b8f5..f34ffc155 100644 --- a/lib/idp_common_pkg/idp_common/extraction/agentic_idp.py +++ b/lib/idp_common_pkg/idp_common/extraction/agentic_idp.py @@ -47,6 +47,33 @@ view_todo_list, ) +# Supported image formats for Bedrock API +SUPPORTED_IMAGE_FORMATS = {"jpeg", "png", "gif", "webp"} + + +def detect_image_format(image_bytes: bytes) -> str: + """ + Detect the image format from raw bytes. + + Args: + image_bytes: Raw image bytes + + Returns: + Image format string suitable for Bedrock API ('jpeg', 'png', 'gif', 'webp') + + Raises: + ValueError: If the image format is unsupported or cannot be detected + """ + img = Image.open(io.BytesIO(image_bytes)) + + if not img.format or img.format.lower() not in SUPPORTED_IMAGE_FORMATS: + raise ValueError( + f"Unsupported image format: {img.format}. " + f"Supported formats: {', '.join(SUPPORTED_IMAGE_FORMATS)}" + ) + return img.format.lower() + + # Use AWS Lambda Powertools Logger for structured logging # Automatically logs as JSON with Lambda context, request_id, timestamp, etc. # In Lambda: Full JSON structured logs @@ -207,11 +234,15 @@ def view_image(image_index: int, agent: Agent) -> dict: # Get the base image (already has grid overlay) img_bytes = page_images[image_index] + # Detect actual image format from bytes + img_format = detect_image_format(img_bytes) + logger.info( "Returning image to agent", extra={ "image_index": image_index, "image_size_bytes": len(img_bytes), + "image_format": img_format, }, ) @@ -220,7 +251,7 @@ def view_image(image_index: int, agent: Agent) -> dict: "content": [ { "image": { - "format": "png", + "format": img_format, "source": { "bytes": img_bytes, }, @@ -775,12 +806,17 @@ def _prepare_prompt_content( extra={"image_count": len(page_images)}, ) - prompt_content += [ - ContentBlock( - image=ImageContent(format="png", source=ImageSource(bytes=img_bytes)) + for img_bytes in page_images: + # Detect actual image format from bytes + img_format = detect_image_format(img_bytes) + prompt_content.append( + ContentBlock( + image=ImageContent( + format=img_format, # pyright: ignore[reportArgumentType] + source=ImageSource(bytes=img_bytes), + ) + ) ) - for img_bytes in page_images - ] # Add existing data context if provided if existing_data: diff --git a/lib/idp_common_pkg/tests/unit/test_delete_tests.py b/lib/idp_common_pkg/tests/unit/test_delete_tests.py index 7243b6a53..6432d56d9 100644 --- a/lib/idp_common_pkg/tests/unit/test_delete_tests.py +++ b/lib/idp_common_pkg/tests/unit/test_delete_tests.py @@ -14,13 +14,19 @@ sys.modules["idp_common_pkg"] = Mock() sys.modules["idp_common_pkg.logger"] = Mock() -# Add the lambda directory to the path for importing -lambda_path = os.path.join( - os.path.dirname(__file__), "../../../../src/lambda/delete_tests" -) -sys.path.insert(0, lambda_path) +# Mock boto3 before importing the Lambda module to prevent NoRegionError +# The Lambda creates boto3 clients at module level which requires AWS region +with patch("boto3.resource") as mock_resource, patch("boto3.client") as mock_client: + mock_resource.return_value = Mock() + mock_client.return_value = Mock() + + # Add the lambda directory to the path for importing + lambda_path = os.path.join( + os.path.dirname(__file__), "../../../../src/lambda/delete_tests" + ) + sys.path.insert(0, lambda_path) -import index # type: ignore[import-untyped] # noqa: E402 + import index # type: ignore[import-untyped] # noqa: E402 @pytest.mark.unit diff --git a/lib/idp_common_pkg/tests/unit/test_results_resolver.py b/lib/idp_common_pkg/tests/unit/test_results_resolver.py index fccc14a46..a187ff40e 100644 --- a/lib/idp_common_pkg/tests/unit/test_results_resolver.py +++ b/lib/idp_common_pkg/tests/unit/test_results_resolver.py @@ -8,18 +8,24 @@ import pytest -# Import the specific lambda module using importlib to avoid conflicts -spec = importlib.util.spec_from_file_location( - "results_index", - os.path.join( - os.path.dirname(__file__), - "../../../../src/lambda/test_results_resolver/index.py", - ), -) -if spec is None or spec.loader is None: - raise ImportError("Could not load test_results_resolver module") -index = importlib.util.module_from_spec(spec) -spec.loader.exec_module(index) +# Mock boto3 before importing the Lambda module to prevent NoRegionError +# The Lambda creates boto3 clients at module level which requires AWS region +with patch("boto3.resource") as mock_resource, patch("boto3.client") as mock_client: + mock_resource.return_value = Mock() + mock_client.return_value = Mock() + + # Import the specific lambda module using importlib to avoid conflicts + spec = importlib.util.spec_from_file_location( + "results_index", + os.path.join( + os.path.dirname(__file__), + "../../../../src/lambda/test_results_resolver/index.py", + ), + ) + if spec is None or spec.loader is None: + raise ImportError("Could not load test_results_resolver module") + index = importlib.util.module_from_spec(spec) + spec.loader.exec_module(index) @pytest.mark.unit