@@ -141,20 +141,24 @@ def __init__(
141141 f"No image sizing configured, applying default limits: "
142142 f"{ DEFAULT_TARGET_WIDTH } x{ DEFAULT_TARGET_HEIGHT } to optimize resource usage and token consumption"
143143 )
144- elif target_width is not None and target_height is not None :
144+ else :
145145 # Handle empty strings by converting to None for validation
146146 if isinstance (target_width , str ) and not target_width .strip ():
147147 target_width = None
148148 if isinstance (target_height , str ) and not target_height .strip ():
149149 target_height = None
150150
151151 # If after handling empty strings we still have values, use them
152- if target_width is not None and target_height is not None :
152+ if target_width is not None or target_height is not None :
153153 # Explicit configuration provided - validate and use it
154154 try :
155155 self .resize_config = {
156- "target_width" : int (target_width ),
157- "target_height" : int (target_height ),
156+ "target_width" : int (target_width )
157+ if target_width is not None
158+ else None ,
159+ "target_height" : int (target_height )
160+ if target_height is not None
161+ else None ,
158162 }
159163 logger .info (
160164 f"Using configured image sizing: { target_width } x{ target_height } "
@@ -178,13 +182,6 @@ def __init__(
178182 f"Invalid image sizing configuration provided, applying default limits: "
179183 f"{ DEFAULT_TARGET_WIDTH } x{ DEFAULT_TARGET_HEIGHT } to optimize resource usage and token consumption"
180184 )
181- else :
182- # Partial configuration (only width or height) - no defaults applied
183- # This preserves the existing behavior for partial configs
184- self .resize_config = None
185- logger .info (
186- "Partial image sizing configuration detected, no defaults applied"
187- )
188185
189186 # Extract preprocessing configuration (type-safe)
190187 preprocessing_value = self .config .ocr .image .preprocessing
@@ -637,20 +634,28 @@ def _process_image_file_direct(
637634 target_height = self .resize_config .get ("target_height" )
638635
639636 if target_width or target_height :
640- # Check if image already fits within target dimensions
641- if (
642- original_width <= target_width
643- and original_height <= target_height
644- ):
645- logger .debug (
646- f"Image { original_width } x{ original_height } already fits within "
647- f"{ target_width } x{ target_height } , using original"
648- )
649- needs_resize = False
637+ # Only check fit if both dimensions are provided (type-safe comparison)
638+ if target_width is not None and target_height is not None :
639+ # Check if image already fits within target dimensions
640+ if (
641+ original_width <= target_width
642+ and original_height <= target_height
643+ ):
644+ logger .debug (
645+ f"Image { original_width } x{ original_height } already fits within "
646+ f"{ target_width } x{ target_height } , using original"
647+ )
648+ needs_resize = False
649+ else :
650+ logger .debug (
651+ f"Image { original_width } x{ original_height } needs resizing to fit "
652+ f"{ target_width } x{ target_height } "
653+ )
654+ needs_resize = True
650655 else :
656+ # Partial config - always resize to calculate missing dimension
651657 logger .debug (
652- f"Image { original_width } x{ original_height } needs resizing to fit "
653- f"{ target_width } x{ target_height } "
658+ "Partial dimension config detected, will resize to calculate missing dimension"
654659 )
655660 needs_resize = True
656661
@@ -687,7 +692,7 @@ def _process_image_file_direct(
687692
688693 # Get original format info
689694 img_data = pix .tobytes ()
690- img_ext = pix .extension # Get original extension (png, jpg, etc.)
695+ img_ext = pix .extension # type: ignore[attr-defined] # Get original extension (png, jpg, etc.)
691696
692697 # Determine content type
693698 content_type_map = {
@@ -743,7 +748,7 @@ def _process_image_file_direct(
743748 else :
744749 # Fallback: extract as rendered image
745750 # This path should rarely be used since we pass original_file_content for images
746- pix = page .get_pixmap ()
751+ pix = page .get_pixmap () # type: ignore[attr-defined]
747752 logger .debug (
748753 f"Using PyMuPDF fallback for image extraction: { pix .width } x{ pix .height } "
749754 )
@@ -1191,7 +1196,7 @@ def _extract_page_image(self, page: fitz.Page, is_pdf: bool, page_id: int) -> by
11911196 # For images, just apply the scale factor
11921197 matrix = fitz .Matrix (scale_factor , scale_factor )
11931198
1194- pix = page .get_pixmap (matrix = matrix )
1199+ pix = page .get_pixmap (matrix = matrix ) # type: ignore[attr-defined]
11951200
11961201 actual_width , actual_height = pix .width , pix .height
11971202 logger .info (
@@ -1202,9 +1207,9 @@ def _extract_page_image(self, page: fitz.Page, is_pdf: bool, page_id: int) -> by
12021207 # No resize needed - image is already smaller than targets
12031208 if is_pdf :
12041209 dpi = self .dpi or 150
1205- pix = page .get_pixmap (dpi = dpi )
1210+ pix = page .get_pixmap (dpi = dpi ) # type: ignore[attr-defined]
12061211 else :
1207- pix = page .get_pixmap ()
1212+ pix = page .get_pixmap () # type: ignore[attr-defined]
12081213
12091214 # Log actual extracted dimensions
12101215 actual_width , actual_height = pix .width , pix .height
@@ -1215,9 +1220,9 @@ def _extract_page_image(self, page: fitz.Page, is_pdf: bool, page_id: int) -> by
12151220 # No valid target dimensions - use original extraction
12161221 if is_pdf :
12171222 dpi = self .dpi or 150
1218- pix = page .get_pixmap (dpi = dpi )
1223+ pix = page .get_pixmap (dpi = dpi ) # type: ignore[attr-defined]
12191224 else :
1220- pix = page .get_pixmap ()
1225+ pix = page .get_pixmap () # type: ignore[attr-defined]
12211226
12221227 # Log actual extracted dimensions
12231228 actual_width , actual_height = pix .width , pix .height
@@ -1228,9 +1233,9 @@ def _extract_page_image(self, page: fitz.Page, is_pdf: bool, page_id: int) -> by
12281233 # No resize config - extract at original size
12291234 if is_pdf :
12301235 dpi = self .dpi or 150
1231- pix = page .get_pixmap (dpi = dpi )
1236+ pix = page .get_pixmap (dpi = dpi ) # type: ignore[attr-defined]
12321237 else :
1233- pix = page .get_pixmap ()
1238+ pix = page .get_pixmap () # type: ignore[attr-defined]
12341239
12351240 # Log actual extracted dimensions
12361241 actual_width , actual_height = pix .width , pix .height
@@ -1562,7 +1567,7 @@ def _parse_textract_response(
15621567 Returns:
15631568 Dictionary with 'text' key containing extracted text
15641569 """
1565- from textractor .parsers import response_parser
1570+ from textractor .parsers import response_parser # type: ignore[import-untyped]
15661571
15671572 # Create page identifier for logging
15681573 page_info = f" for page { page_id } " if page_id else ""
0 commit comments