Skip to content

Commit 0877b5f

Browse files
committed
Merge branch 'fix/image-res' into 'develop'
fix: preserve image format and resolution for PNG/JPG files See merge request genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator!223
2 parents 95a2f6a + 00c0f05 commit 0877b5f

File tree

12 files changed

+564
-75
lines changed

12 files changed

+564
-75
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ SPDX-License-Identifier: MIT-0
2525

2626

2727
### Fixed
28+
- **OCR Service Image Processing for PNG/JPG Files**
29+
- Fixed issue where PNG files were being unnecessarily converted to JPEG format and resized
30+
- PNG and JPG files now preserve their original format when stored in S3
31+
- Image resolution is preserved by default unless explicitly configured via `resize_config`
32+
- DPI settings now only apply to PDF files, not to image files
33+
- Resolves issue where PNG files were being converted to lower resolution JPG files
2834

2935

3036
## [0.3.7]

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.3.8-wip1
1+
0.3.8-wip3

lib/idp_common_pkg/idp_common/assessment/granular_service.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,17 +1098,29 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
10981098
page = document.pages[page_id]
10991099
image_uri = page.image_uri
11001100

1101+
if target_width is not None and target_height is not None:
1102+
# Handle empty strings and convert to int
1103+
if isinstance(target_width, str) and not target_width.strip():
1104+
target_width = None
1105+
if isinstance(target_height, str) and not target_height.strip():
1106+
target_height = None
1107+
1108+
# Only proceed if we have valid values after cleaning
11011109
if target_width is not None and target_height is not None:
1102-
# Cast to int in case config values are strings
1103-
target_width = int(target_width)
1104-
target_height = int(target_height)
1105-
image_content = image.prepare_image(
1106-
image_uri, target_width, target_height
1107-
)
1110+
try:
1111+
target_width = int(target_width)
1112+
target_height = int(target_height)
1113+
image_content = image.prepare_image(
1114+
image_uri, target_width, target_height
1115+
)
1116+
except (ValueError, TypeError) as e:
1117+
logger.warning(f"Invalid resize configuration values: {e}")
1118+
# Fall back to default image preparation
1119+
image_content = image.prepare_image(image_uri)
11081120
else:
1109-
image_content = image.prepare_image(
1110-
image_uri
1111-
) # Uses function defaults
1121+
image_content = image.prepare_image(image_uri)
1122+
else:
1123+
image_content = image.prepare_image(image_uri) # Uses function defaults
11121124
page_images.append(image_content)
11131125

11141126
t3 = time.time()

lib/idp_common_pkg/idp_common/assessment/service.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -696,17 +696,29 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
696696
page = document.pages[page_id]
697697
image_uri = page.image_uri
698698

699+
if target_width is not None and target_height is not None:
700+
# Handle empty strings and convert to int
701+
if isinstance(target_width, str) and not target_width.strip():
702+
target_width = None
703+
if isinstance(target_height, str) and not target_height.strip():
704+
target_height = None
705+
706+
# Only proceed if we have valid values after cleaning
699707
if target_width is not None and target_height is not None:
700-
# Cast to int in case config values are strings
701-
target_width = int(target_width)
702-
target_height = int(target_height)
703-
image_content = image.prepare_image(
704-
image_uri, target_width, target_height
705-
)
708+
try:
709+
target_width = int(target_width)
710+
target_height = int(target_height)
711+
image_content = image.prepare_image(
712+
image_uri, target_width, target_height
713+
)
714+
except (ValueError, TypeError) as e:
715+
logger.warning(f"Invalid resize configuration values: {e}")
716+
# Fall back to default image preparation
717+
image_content = image.prepare_image(image_uri)
706718
else:
707-
image_content = image.prepare_image(
708-
image_uri
709-
) # Uses function defaults
719+
image_content = image.prepare_image(image_uri)
720+
else:
721+
image_content = image.prepare_image(image_uri) # Uses function defaults
710722
page_images.append(image_content)
711723

712724
t3 = time.time()

lib/idp_common_pkg/idp_common/classification/service.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -618,12 +618,26 @@ def classify_page_bedrock(
618618
target_height = image_config.get("target_height")
619619

620620
if target_width is not None and target_height is not None:
621-
# Cast to int in case config values are strings
622-
target_width = int(target_width)
623-
target_height = int(target_height)
624-
image_content = image.prepare_image(
625-
image_uri, target_width, target_height
626-
)
621+
# Handle empty strings and convert to int
622+
if isinstance(target_width, str) and not target_width.strip():
623+
target_width = None
624+
if isinstance(target_height, str) and not target_height.strip():
625+
target_height = None
626+
627+
# Only proceed if we have valid values after cleaning
628+
if target_width is not None and target_height is not None:
629+
try:
630+
target_width = int(target_width)
631+
target_height = int(target_height)
632+
image_content = image.prepare_image(
633+
image_uri, target_width, target_height
634+
)
635+
except (ValueError, TypeError) as e:
636+
logger.warning(f"Invalid resize configuration values: {e}")
637+
# Fall back to default image preparation
638+
image_content = image.prepare_image(image_uri)
639+
else:
640+
image_content = image.prepare_image(image_uri)
627641
else:
628642
image_content = image.prepare_image(
629643
image_uri

lib/idp_common_pkg/idp_common/extraction/service.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -619,12 +619,26 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
619619
page = document.pages[page_id]
620620
image_uri = page.image_uri
621621
if target_width is not None and target_height is not None:
622-
# Cast to int in case config values are strings
623-
target_width = int(target_width)
624-
target_height = int(target_height)
625-
image_content = image.prepare_image(
626-
image_uri, target_width, target_height
627-
)
622+
# Handle empty strings and convert to int
623+
if isinstance(target_width, str) and not target_width.strip():
624+
target_width = None
625+
if isinstance(target_height, str) and not target_height.strip():
626+
target_height = None
627+
628+
# Only proceed if we have valid values after cleaning
629+
if target_width is not None and target_height is not None:
630+
try:
631+
target_width = int(target_width)
632+
target_height = int(target_height)
633+
image_content = image.prepare_image(
634+
image_uri, target_width, target_height
635+
)
636+
except (ValueError, TypeError) as e:
637+
logger.warning(f"Invalid resize configuration values: {e}")
638+
# Fall back to default image preparation
639+
image_content = image.prepare_image(image_uri)
640+
else:
641+
image_content = image.prepare_image(image_uri)
628642
else:
629643
image_content = image.prepare_image(
630644
image_uri

lib/idp_common_pkg/idp_common/image/__init__.py

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,54 +12,86 @@
1212

1313
def resize_image(image_data: bytes,
1414
target_width: int = 951,
15-
target_height: int = 1268) -> bytes:
15+
target_height: int = 1268,
16+
allow_upscale: bool = False) -> bytes:
1617
"""
1718
Resize an image to fit within target dimensions while preserving aspect ratio.
1819
No padding, no distortion - pure proportional scaling.
20+
Preserves original format when possible.
1921
2022
Args:
2123
image_data: Raw image bytes
2224
target_width: Target width in pixels
2325
target_height: Target height in pixels
26+
allow_upscale: Whether to allow making the image larger than original
2427
2528
Returns:
26-
Resized image as JPEG bytes
29+
Resized image bytes in original format (or JPEG if format cannot be preserved)
2730
"""
2831
image = Image.open(io.BytesIO(image_data))
2932
current_width, current_height = image.size
33+
original_format = image.format # Store original format
3034

3135
# Calculate scaling factor to fit within bounds while preserving aspect ratio
3236
width_ratio = target_width / current_width
3337
height_ratio = target_height / current_height
3438
scale_factor = min(width_ratio, height_ratio) # Fit within bounds
3539

36-
# Only resize if we're making it smaller
37-
if scale_factor < 1.0:
40+
# Determine if resizing is needed
41+
needs_resize = (scale_factor < 1.0) or (allow_upscale and scale_factor > 1.0)
42+
43+
if needs_resize:
3844
new_width = int(current_width * scale_factor)
3945
new_height = int(current_height * scale_factor)
4046
logger.info(f"Resizing image from {current_width}x{current_height} to {new_width}x{new_height} (scale: {scale_factor:.3f})")
4147
image = image.resize((new_width, new_height), Image.LANCZOS)
48+
49+
# Save in original format if possible
50+
img_byte_array = io.BytesIO()
51+
52+
# Determine save format - use original if available, otherwise JPEG
53+
if original_format and original_format in ['JPEG', 'PNG', 'GIF', 'BMP', 'TIFF', 'WEBP']:
54+
save_format = original_format
55+
else:
56+
save_format = 'JPEG'
57+
logger.info(f"Converting from {original_format or 'unknown'} to JPEG")
58+
59+
# Prepare save parameters
60+
save_kwargs = {"format": save_format}
61+
62+
# Add quality parameters for JPEG
63+
if save_format in ['JPEG', 'JPG']:
64+
save_kwargs["quality"] = 95 # High quality
65+
save_kwargs["optimize"] = True
66+
67+
# Handle format-specific requirements
68+
if save_format == 'PNG' and image.mode not in ['RGBA', 'LA', 'L', 'P']:
69+
# PNG requires specific modes
70+
if image.mode == 'CMYK':
71+
image = image.convert('RGB')
72+
73+
image.save(img_byte_array, **save_kwargs)
74+
return img_byte_array.getvalue()
4275
else:
43-
logger.debug(f"Image {current_width}x{current_height} already fits within {target_width}x{target_height}, no resizing needed")
44-
45-
# Convert to JPEG bytes
46-
img_byte_array = io.BytesIO()
47-
image.save(img_byte_array, format="JPEG")
48-
return img_byte_array.getvalue()
76+
# No resizing needed - return original data unchanged
77+
logger.info(f"Image {current_width}x{current_height} already fits within {target_width}x{target_height}, returning original")
78+
return image_data
4979

5080
def prepare_image(image_source: Union[str, bytes],
5181
target_width: int = 951,
52-
target_height: int = 1268) -> bytes:
82+
target_height: int = 1268,
83+
allow_upscale: bool = False) -> bytes:
5384
"""
5485
Prepare an image for model input from either S3 URI or raw bytes
5586
5687
Args:
5788
image_source: Either an S3 URI (s3://bucket/key) or raw image bytes
5889
target_width: Target width in pixels
5990
target_height: Target height in pixels
91+
allow_upscale: Whether to allow making the image larger than original
6092
6193
Returns:
62-
Processed image as JPEG bytes ready for model input
94+
Processed image bytes ready for model input (preserves format when possible)
6395
"""
6496
# Get the image data
6597
if isinstance(image_source, str) and image_source.startswith('s3://'):
@@ -70,7 +102,7 @@ def prepare_image(image_source: Union[str, bytes],
70102
raise ValueError(f"Invalid image source: {type(image_source)}. Must be S3 URI or bytes.")
71103

72104
# Resize and process
73-
return resize_image(image_data, target_width, target_height)
105+
return resize_image(image_data, target_width, target_height, allow_upscale)
74106

75107
def apply_adaptive_binarization(image_data: bytes) -> bytes:
76108
"""

0 commit comments

Comments
 (0)