Merge branch 'fix/ocr-image-resizing' into 'develop'

rstrahan · rstrahan · commit 79d088ca2864 · 2025-12-12T21:13:20.000Z
&gt; Based on the git diff, I can see the changes involve:

See merge request genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator!462
diff --git a/lib/idp_common_pkg/idp_common/image/__init__.py b/lib/idp_common_pkg/idp_common/image/__init__.py
@@ -34,22 +34,41 @@ def resize_image(image_data: bytes,
     if isinstance(target_height, str) and not target_height.strip():
         target_height = None
     
-    # If either dimension is None, return original image unchanged
-    if target_width is None or target_height is None:
-        logger.info("No resize requested (width or height is None/empty), returning original image")
+    # If BOTH dimensions are None, return original image unchanged
+    if target_width is None and target_height is None:
+        logger.info("No resize requested (both dimensions are None), returning original image")
         return image_data
     
-    # Convert to int if needed
+    # Convert to int if needed (before opening image)
     try:
-        target_width = int(target_width)
-        target_height = int(target_height)
+        if target_width is not None:
+            target_width = int(target_width)
+        if target_height is not None:
+            target_height = int(target_height)
     except (ValueError, TypeError):
         logger.warning(f"Invalid resize dimensions: width={target_width}, height={target_height}, returning original image")
         return image_data
+    
+    # Open image to get dimensions and calculate missing dimension if needed
     image = Image.open(io.BytesIO(image_data))
     current_width, current_height = image.size
     original_format = image.format  # Store original format
     
+    # Calculate missing dimension if only one provided (preserving aspect ratio)
+    if target_width is None and target_height is not None:
+        # Only height provided - calculate width preserving aspect ratio
+        aspect_ratio = current_width / current_height
+        target_width = int(target_height * aspect_ratio)
+        logger.info(f"Calculated target_width={target_width} from target_height={target_height} (aspect={aspect_ratio:.3f})")
+    elif target_height is None and target_width is not None:
+        # Only width provided - calculate height preserving aspect ratio  
+        aspect_ratio = current_height / current_width
+        target_height = int(target_width * aspect_ratio)
+        logger.info(f"Calculated target_height={target_height} from target_width={target_width} (aspect={aspect_ratio:.3f})")
+    
+    # At this point, both dimensions must be set (type guard for Pylance)
+    assert target_width is not None and target_height is not None, "Both dimensions should be set after calculation"
+    
     # Calculate scaling factor to fit within bounds while preserving aspect ratio
     width_ratio = target_width / current_width
     height_ratio = target_height / current_height
@@ -62,7 +81,7 @@ def resize_image(image_data: bytes,
         new_width = int(current_width * scale_factor)
         new_height = int(current_height * scale_factor)
         logger.info(f"Resizing image from {current_width}x{current_height} to {new_width}x{new_height} (scale: {scale_factor:.3f})")
-        image = image.resize((new_width, new_height), Image.LANCZOS)
+        image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
         
         # Save in original format if possible
         img_byte_array = io.BytesIO()
diff --git a/lib/idp_common_pkg/idp_common/ocr/service.py b/lib/idp_common_pkg/idp_common/ocr/service.py
@@ -141,20 +141,24 @@ def __init__(
                     f"No image sizing configured, applying default limits: "
                     f"{DEFAULT_TARGET_WIDTH}x{DEFAULT_TARGET_HEIGHT} to optimize resource usage and token consumption"
                 )
-            elif target_width is not None and target_height is not None:
+            else:
                 # Handle empty strings by converting to None for validation
                 if isinstance(target_width, str) and not target_width.strip():
                     target_width = None
                 if isinstance(target_height, str) and not target_height.strip():
                     target_height = None
 
                 # If after handling empty strings we still have values, use them
-                if target_width is not None and target_height is not None:
+                if target_width is not None or target_height is not None:
                     # Explicit configuration provided - validate and use it
                     try:
                         self.resize_config = {
-                            "target_width": int(target_width),
-                            "target_height": int(target_height),
+                            "target_width": int(target_width)
+                            if target_width is not None
+                            else None,
+                            "target_height": int(target_height)
+                            if target_height is not None
+                            else None,
                         }
                         logger.info(
                             f"Using configured image sizing: {target_width}x{target_height}"
@@ -178,13 +182,6 @@ def __init__(
                         f"Invalid image sizing configuration provided, applying default limits: "
                         f"{DEFAULT_TARGET_WIDTH}x{DEFAULT_TARGET_HEIGHT} to optimize resource usage and token consumption"
                     )
-            else:
-                # Partial configuration (only width or height) - no defaults applied
-                # This preserves the existing behavior for partial configs
-                self.resize_config = None
-                logger.info(
-                    "Partial image sizing configuration detected, no defaults applied"
-                )
 
             # Extract preprocessing configuration (type-safe)
             preprocessing_value = self.config.ocr.image.preprocessing
@@ -637,20 +634,28 @@ def _process_image_file_direct(
                 target_height = self.resize_config.get("target_height")
 
                 if target_width or target_height:
-                    # Check if image already fits within target dimensions
-                    if (
-                        original_width <= target_width
-                        and original_height <= target_height
-                    ):
-                        logger.debug(
-                            f"Image {original_width}x{original_height} already fits within "
-                            f"{target_width}x{target_height}, using original"
-                        )
-                        needs_resize = False
+                    # Only check fit if both dimensions are provided (type-safe comparison)
+                    if target_width is not None and target_height is not None:
+                        # Check if image already fits within target dimensions
+                        if (
+                            original_width <= target_width
+                            and original_height <= target_height
+                        ):
+                            logger.debug(
+                                f"Image {original_width}x{original_height} already fits within "
+                                f"{target_width}x{target_height}, using original"
+                            )
+                            needs_resize = False
+                        else:
+                            logger.debug(
+                                f"Image {original_width}x{original_height} needs resizing to fit "
+                                f"{target_width}x{target_height}"
+                            )
+                            needs_resize = True
                     else:
+                        # Partial config - always resize to calculate missing dimension
                         logger.debug(
-                            f"Image {original_width}x{original_height} needs resizing to fit "
-                            f"{target_width}x{target_height}"
+                            "Partial dimension config detected, will resize to calculate missing dimension"
                         )
                         needs_resize = True
 
@@ -687,7 +692,7 @@ def _process_image_file_direct(
 
                 # Get original format info
                 img_data = pix.tobytes()
-                img_ext = pix.extension  # Get original extension (png, jpg, etc.)
+                img_ext = pix.extension  # type: ignore[attr-defined]  # Get original extension (png, jpg, etc.)
 
                 # Determine content type
                 content_type_map = {
@@ -743,7 +748,7 @@ def _process_image_file_direct(
             else:
                 # Fallback: extract as rendered image
                 # This path should rarely be used since we pass original_file_content for images
-                pix = page.get_pixmap()
+                pix = page.get_pixmap()  # type: ignore[attr-defined]
                 logger.debug(
                     f"Using PyMuPDF fallback for image extraction: {pix.width}x{pix.height}"
                 )
@@ -1191,7 +1196,7 @@ def _extract_page_image(self, page: fitz.Page, is_pdf: bool, page_id: int) -> by
                             # For images, just apply the scale factor
                             matrix = fitz.Matrix(scale_factor, scale_factor)
 
-                        pix = page.get_pixmap(matrix=matrix)
+                        pix = page.get_pixmap(matrix=matrix)  # type: ignore[attr-defined]
 
                         actual_width, actual_height = pix.width, pix.height
                         logger.info(
@@ -1202,9 +1207,9 @@ def _extract_page_image(self, page: fitz.Page, is_pdf: bool, page_id: int) -> by
                         # No resize needed - image is already smaller than targets
                         if is_pdf:
                             dpi = self.dpi or 150
-                            pix = page.get_pixmap(dpi=dpi)
+                            pix = page.get_pixmap(dpi=dpi)  # type: ignore[attr-defined]
                         else:
-                            pix = page.get_pixmap()
+                            pix = page.get_pixmap()  # type: ignore[attr-defined]
 
                         # Log actual extracted dimensions
                         actual_width, actual_height = pix.width, pix.height
@@ -1215,9 +1220,9 @@ def _extract_page_image(self, page: fitz.Page, is_pdf: bool, page_id: int) -> by
                     # No valid target dimensions - use original extraction
                     if is_pdf:
                         dpi = self.dpi or 150
-                        pix = page.get_pixmap(dpi=dpi)
+                        pix = page.get_pixmap(dpi=dpi)  # type: ignore[attr-defined]
                     else:
-                        pix = page.get_pixmap()
+                        pix = page.get_pixmap()  # type: ignore[attr-defined]
 
                     # Log actual extracted dimensions
                     actual_width, actual_height = pix.width, pix.height
@@ -1228,9 +1233,9 @@ def _extract_page_image(self, page: fitz.Page, is_pdf: bool, page_id: int) -> by
                 # No resize config - extract at original size
                 if is_pdf:
                     dpi = self.dpi or 150
-                    pix = page.get_pixmap(dpi=dpi)
+                    pix = page.get_pixmap(dpi=dpi)  # type: ignore[attr-defined]
                 else:
-                    pix = page.get_pixmap()
+                    pix = page.get_pixmap()  # type: ignore[attr-defined]
 
                 # Log actual extracted dimensions
                 actual_width, actual_height = pix.width, pix.height
@@ -1562,7 +1567,7 @@ def _parse_textract_response(
         Returns:
             Dictionary with 'text' key containing extracted text
         """
-        from textractor.parsers import response_parser
+        from textractor.parsers import response_parser  # type: ignore[import-untyped]
 
         # Create page identifier for logging
         page_info = f" for page {page_id}" if page_id else ""
diff --git a/lib/idp_common_pkg/tests/unit/ocr/test_ocr_service.py b/lib/idp_common_pkg/tests/unit/ocr/test_ocr_service.py
@@ -250,22 +250,25 @@ def test_init_config_pattern_empty_strings_apply_defaults(self):
             assert service.dpi == 150
 
     def test_init_config_pattern_partial_sizing(self):
-        """Test initialization with partial sizing configuration preserves existing behavior."""
+        """Test initialization with partial sizing configuration enables single-dimension resizing."""
         config = {
             "ocr": {
                 "image": {
                     "dpi": 150,
                     "target_width": 800,
-                    # target_height missing - should disable defaults
+                    # target_height missing - should pass through with None to enable aspect-ratio calculation
                 }
             }
         }
 
         with patch("boto3.client"):
             service = OcrService(config=config)
 
-            # Verify partial config disables defaults
-            assert service.resize_config is None
+            # Verify partial config is preserved (enables aspect-ratio calculation)
+            assert service.resize_config == {
+                "target_width": 800,
+                "target_height": None,
+            }
             assert service.dpi == 150
 
     def test_init_config_pattern_invalid_sizing_fallback(self):