Skip to content

Commit 79d088c

Browse files
committed
Merge branch 'fix/ocr-image-resizing' into 'develop'
> Based on the git diff, I can see the changes involve: See merge request genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator!462
2 parents ab29c4d + 05ca24a commit 79d088c

File tree

3 files changed

+71
-44
lines changed

3 files changed

+71
-44
lines changed

lib/idp_common_pkg/idp_common/image/__init__.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,22 +34,41 @@ def resize_image(image_data: bytes,
3434
if isinstance(target_height, str) and not target_height.strip():
3535
target_height = None
3636

37-
# If either dimension is None, return original image unchanged
38-
if target_width is None or target_height is None:
39-
logger.info("No resize requested (width or height is None/empty), returning original image")
37+
# If BOTH dimensions are None, return original image unchanged
38+
if target_width is None and target_height is None:
39+
logger.info("No resize requested (both dimensions are None), returning original image")
4040
return image_data
4141

42-
# Convert to int if needed
42+
# Convert to int if needed (before opening image)
4343
try:
44-
target_width = int(target_width)
45-
target_height = int(target_height)
44+
if target_width is not None:
45+
target_width = int(target_width)
46+
if target_height is not None:
47+
target_height = int(target_height)
4648
except (ValueError, TypeError):
4749
logger.warning(f"Invalid resize dimensions: width={target_width}, height={target_height}, returning original image")
4850
return image_data
51+
52+
# Open image to get dimensions and calculate missing dimension if needed
4953
image = Image.open(io.BytesIO(image_data))
5054
current_width, current_height = image.size
5155
original_format = image.format # Store original format
5256

57+
# Calculate missing dimension if only one provided (preserving aspect ratio)
58+
if target_width is None and target_height is not None:
59+
# Only height provided - calculate width preserving aspect ratio
60+
aspect_ratio = current_width / current_height
61+
target_width = int(target_height * aspect_ratio)
62+
logger.info(f"Calculated target_width={target_width} from target_height={target_height} (aspect={aspect_ratio:.3f})")
63+
elif target_height is None and target_width is not None:
64+
# Only width provided - calculate height preserving aspect ratio
65+
aspect_ratio = current_height / current_width
66+
target_height = int(target_width * aspect_ratio)
67+
logger.info(f"Calculated target_height={target_height} from target_width={target_width} (aspect={aspect_ratio:.3f})")
68+
69+
# At this point, both dimensions must be set (type guard for Pylance)
70+
assert target_width is not None and target_height is not None, "Both dimensions should be set after calculation"
71+
5372
# Calculate scaling factor to fit within bounds while preserving aspect ratio
5473
width_ratio = target_width / current_width
5574
height_ratio = target_height / current_height
@@ -62,7 +81,7 @@ def resize_image(image_data: bytes,
6281
new_width = int(current_width * scale_factor)
6382
new_height = int(current_height * scale_factor)
6483
logger.info(f"Resizing image from {current_width}x{current_height} to {new_width}x{new_height} (scale: {scale_factor:.3f})")
65-
image = image.resize((new_width, new_height), Image.LANCZOS)
84+
image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
6685

6786
# Save in original format if possible
6887
img_byte_array = io.BytesIO()

lib/idp_common_pkg/idp_common/ocr/service.py

Lines changed: 38 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -141,20 +141,24 @@ def __init__(
141141
f"No image sizing configured, applying default limits: "
142142
f"{DEFAULT_TARGET_WIDTH}x{DEFAULT_TARGET_HEIGHT} to optimize resource usage and token consumption"
143143
)
144-
elif target_width is not None and target_height is not None:
144+
else:
145145
# Handle empty strings by converting to None for validation
146146
if isinstance(target_width, str) and not target_width.strip():
147147
target_width = None
148148
if isinstance(target_height, str) and not target_height.strip():
149149
target_height = None
150150

151151
# If after handling empty strings we still have values, use them
152-
if target_width is not None and target_height is not None:
152+
if target_width is not None or target_height is not None:
153153
# Explicit configuration provided - validate and use it
154154
try:
155155
self.resize_config = {
156-
"target_width": int(target_width),
157-
"target_height": int(target_height),
156+
"target_width": int(target_width)
157+
if target_width is not None
158+
else None,
159+
"target_height": int(target_height)
160+
if target_height is not None
161+
else None,
158162
}
159163
logger.info(
160164
f"Using configured image sizing: {target_width}x{target_height}"
@@ -178,13 +182,6 @@ def __init__(
178182
f"Invalid image sizing configuration provided, applying default limits: "
179183
f"{DEFAULT_TARGET_WIDTH}x{DEFAULT_TARGET_HEIGHT} to optimize resource usage and token consumption"
180184
)
181-
else:
182-
# Partial configuration (only width or height) - no defaults applied
183-
# This preserves the existing behavior for partial configs
184-
self.resize_config = None
185-
logger.info(
186-
"Partial image sizing configuration detected, no defaults applied"
187-
)
188185

189186
# Extract preprocessing configuration (type-safe)
190187
preprocessing_value = self.config.ocr.image.preprocessing
@@ -637,20 +634,28 @@ def _process_image_file_direct(
637634
target_height = self.resize_config.get("target_height")
638635

639636
if target_width or target_height:
640-
# Check if image already fits within target dimensions
641-
if (
642-
original_width <= target_width
643-
and original_height <= target_height
644-
):
645-
logger.debug(
646-
f"Image {original_width}x{original_height} already fits within "
647-
f"{target_width}x{target_height}, using original"
648-
)
649-
needs_resize = False
637+
# Only check fit if both dimensions are provided (type-safe comparison)
638+
if target_width is not None and target_height is not None:
639+
# Check if image already fits within target dimensions
640+
if (
641+
original_width <= target_width
642+
and original_height <= target_height
643+
):
644+
logger.debug(
645+
f"Image {original_width}x{original_height} already fits within "
646+
f"{target_width}x{target_height}, using original"
647+
)
648+
needs_resize = False
649+
else:
650+
logger.debug(
651+
f"Image {original_width}x{original_height} needs resizing to fit "
652+
f"{target_width}x{target_height}"
653+
)
654+
needs_resize = True
650655
else:
656+
# Partial config - always resize to calculate missing dimension
651657
logger.debug(
652-
f"Image {original_width}x{original_height} needs resizing to fit "
653-
f"{target_width}x{target_height}"
658+
"Partial dimension config detected, will resize to calculate missing dimension"
654659
)
655660
needs_resize = True
656661

@@ -687,7 +692,7 @@ def _process_image_file_direct(
687692

688693
# Get original format info
689694
img_data = pix.tobytes()
690-
img_ext = pix.extension # Get original extension (png, jpg, etc.)
695+
img_ext = pix.extension # type: ignore[attr-defined] # Get original extension (png, jpg, etc.)
691696

692697
# Determine content type
693698
content_type_map = {
@@ -743,7 +748,7 @@ def _process_image_file_direct(
743748
else:
744749
# Fallback: extract as rendered image
745750
# This path should rarely be used since we pass original_file_content for images
746-
pix = page.get_pixmap()
751+
pix = page.get_pixmap() # type: ignore[attr-defined]
747752
logger.debug(
748753
f"Using PyMuPDF fallback for image extraction: {pix.width}x{pix.height}"
749754
)
@@ -1191,7 +1196,7 @@ def _extract_page_image(self, page: fitz.Page, is_pdf: bool, page_id: int) -> by
11911196
# For images, just apply the scale factor
11921197
matrix = fitz.Matrix(scale_factor, scale_factor)
11931198

1194-
pix = page.get_pixmap(matrix=matrix)
1199+
pix = page.get_pixmap(matrix=matrix) # type: ignore[attr-defined]
11951200

11961201
actual_width, actual_height = pix.width, pix.height
11971202
logger.info(
@@ -1202,9 +1207,9 @@ def _extract_page_image(self, page: fitz.Page, is_pdf: bool, page_id: int) -> by
12021207
# No resize needed - image is already smaller than targets
12031208
if is_pdf:
12041209
dpi = self.dpi or 150
1205-
pix = page.get_pixmap(dpi=dpi)
1210+
pix = page.get_pixmap(dpi=dpi) # type: ignore[attr-defined]
12061211
else:
1207-
pix = page.get_pixmap()
1212+
pix = page.get_pixmap() # type: ignore[attr-defined]
12081213

12091214
# Log actual extracted dimensions
12101215
actual_width, actual_height = pix.width, pix.height
@@ -1215,9 +1220,9 @@ def _extract_page_image(self, page: fitz.Page, is_pdf: bool, page_id: int) -> by
12151220
# No valid target dimensions - use original extraction
12161221
if is_pdf:
12171222
dpi = self.dpi or 150
1218-
pix = page.get_pixmap(dpi=dpi)
1223+
pix = page.get_pixmap(dpi=dpi) # type: ignore[attr-defined]
12191224
else:
1220-
pix = page.get_pixmap()
1225+
pix = page.get_pixmap() # type: ignore[attr-defined]
12211226

12221227
# Log actual extracted dimensions
12231228
actual_width, actual_height = pix.width, pix.height
@@ -1228,9 +1233,9 @@ def _extract_page_image(self, page: fitz.Page, is_pdf: bool, page_id: int) -> by
12281233
# No resize config - extract at original size
12291234
if is_pdf:
12301235
dpi = self.dpi or 150
1231-
pix = page.get_pixmap(dpi=dpi)
1236+
pix = page.get_pixmap(dpi=dpi) # type: ignore[attr-defined]
12321237
else:
1233-
pix = page.get_pixmap()
1238+
pix = page.get_pixmap() # type: ignore[attr-defined]
12341239

12351240
# Log actual extracted dimensions
12361241
actual_width, actual_height = pix.width, pix.height
@@ -1562,7 +1567,7 @@ def _parse_textract_response(
15621567
Returns:
15631568
Dictionary with 'text' key containing extracted text
15641569
"""
1565-
from textractor.parsers import response_parser
1570+
from textractor.parsers import response_parser # type: ignore[import-untyped]
15661571

15671572
# Create page identifier for logging
15681573
page_info = f" for page {page_id}" if page_id else ""

lib/idp_common_pkg/tests/unit/ocr/test_ocr_service.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -250,22 +250,25 @@ def test_init_config_pattern_empty_strings_apply_defaults(self):
250250
assert service.dpi == 150
251251

252252
def test_init_config_pattern_partial_sizing(self):
253-
"""Test initialization with partial sizing configuration preserves existing behavior."""
253+
"""Test initialization with partial sizing configuration enables single-dimension resizing."""
254254
config = {
255255
"ocr": {
256256
"image": {
257257
"dpi": 150,
258258
"target_width": 800,
259-
# target_height missing - should disable defaults
259+
# target_height missing - should pass through with None to enable aspect-ratio calculation
260260
}
261261
}
262262
}
263263

264264
with patch("boto3.client"):
265265
service = OcrService(config=config)
266266

267-
# Verify partial config disables defaults
268-
assert service.resize_config is None
267+
# Verify partial config is preserved (enables aspect-ratio calculation)
268+
assert service.resize_config == {
269+
"target_width": 800,
270+
"target_height": None,
271+
}
269272
assert service.dpi == 150
270273

271274
def test_init_config_pattern_invalid_sizing_fallback(self):

0 commit comments

Comments
 (0)