Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 42 additions & 6 deletions lib/idp_common_pkg/idp_common/extraction/agentic_idp.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,33 @@
view_todo_list,
)

# Supported image formats for Bedrock API
SUPPORTED_IMAGE_FORMATS = {"jpeg", "png", "gif", "webp"}


def detect_image_format(image_bytes: bytes) -> str:
"""
Detect the image format from raw bytes.

Args:
image_bytes: Raw image bytes

Returns:
Image format string suitable for Bedrock API ('jpeg', 'png', 'gif', 'webp')

Raises:
ValueError: If the image format is unsupported or cannot be detected
"""
img = Image.open(io.BytesIO(image_bytes))

if not img.format or img.format.lower() not in SUPPORTED_IMAGE_FORMATS:
raise ValueError(
f"Unsupported image format: {img.format}. "
f"Supported formats: {', '.join(SUPPORTED_IMAGE_FORMATS)}"
)
return img.format.lower()


# Use AWS Lambda Powertools Logger for structured logging
# Automatically logs as JSON with Lambda context, request_id, timestamp, etc.
# In Lambda: Full JSON structured logs
Expand Down Expand Up @@ -207,11 +234,15 @@ def view_image(image_index: int, agent: Agent) -> dict:
# Get the base image (already has grid overlay)
img_bytes = page_images[image_index]

# Detect actual image format from bytes
img_format = detect_image_format(img_bytes)

logger.info(
"Returning image to agent",
extra={
"image_index": image_index,
"image_size_bytes": len(img_bytes),
"image_format": img_format,
},
)

Expand All @@ -220,7 +251,7 @@ def view_image(image_index: int, agent: Agent) -> dict:
"content": [
{
"image": {
"format": "png",
"format": img_format,
"source": {
"bytes": img_bytes,
},
Expand Down Expand Up @@ -775,12 +806,17 @@ def _prepare_prompt_content(
extra={"image_count": len(page_images)},
)

prompt_content += [
ContentBlock(
image=ImageContent(format="png", source=ImageSource(bytes=img_bytes))
for img_bytes in page_images:
# Detect actual image format from bytes
img_format = detect_image_format(img_bytes)
prompt_content.append(
ContentBlock(
image=ImageContent(
format=img_format, # pyright: ignore[reportArgumentType]
source=ImageSource(bytes=img_bytes),
)
)
)
for img_bytes in page_images
]

# Add existing data context if provided
if existing_data:
Expand Down
18 changes: 12 additions & 6 deletions lib/idp_common_pkg/tests/unit/test_delete_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,19 @@
sys.modules["idp_common_pkg"] = Mock()
sys.modules["idp_common_pkg.logger"] = Mock()

# Add the lambda directory to the path for importing
lambda_path = os.path.join(
os.path.dirname(__file__), "../../../../src/lambda/delete_tests"
)
sys.path.insert(0, lambda_path)
# Mock boto3 before importing the Lambda module to prevent NoRegionError
# The Lambda creates boto3 clients at module level which requires AWS region
with patch("boto3.resource") as mock_resource, patch("boto3.client") as mock_client:
mock_resource.return_value = Mock()
mock_client.return_value = Mock()

# Add the lambda directory to the path for importing
lambda_path = os.path.join(
os.path.dirname(__file__), "../../../../src/lambda/delete_tests"
)
sys.path.insert(0, lambda_path)

import index # type: ignore[import-untyped] # noqa: E402
import index # type: ignore[import-untyped] # noqa: E402


@pytest.mark.unit
Expand Down
30 changes: 18 additions & 12 deletions lib/idp_common_pkg/tests/unit/test_results_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,24 @@

import pytest

# Import the specific lambda module using importlib to avoid conflicts
spec = importlib.util.spec_from_file_location(
"results_index",
os.path.join(
os.path.dirname(__file__),
"../../../../src/lambda/test_results_resolver/index.py",
),
)
if spec is None or spec.loader is None:
raise ImportError("Could not load test_results_resolver module")
index = importlib.util.module_from_spec(spec)
spec.loader.exec_module(index)
# Mock boto3 before importing the Lambda module to prevent NoRegionError
# The Lambda creates boto3 clients at module level which requires AWS region
with patch("boto3.resource") as mock_resource, patch("boto3.client") as mock_client:
mock_resource.return_value = Mock()
mock_client.return_value = Mock()

# Import the specific lambda module using importlib to avoid conflicts
spec = importlib.util.spec_from_file_location(
"results_index",
os.path.join(
os.path.dirname(__file__),
"../../../../src/lambda/test_results_resolver/index.py",
),
)
if spec is None or spec.loader is None:
raise ImportError("Could not load test_results_resolver module")
index = importlib.util.module_from_spec(spec)
spec.loader.exec_module(index)


@pytest.mark.unit
Expand Down