Skip to content

Commit 5c1c946

Browse files
committed
feat: better handling of default values
1 parent 6bce925 commit 5c1c946

File tree

9 files changed

+2510
-55
lines changed

9 files changed

+2510
-55
lines changed

config_library/pattern-2/default/config.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ ocr:
77
- name: LAYOUT
88
- name: TABLES
99
- name: SIGNATURES
10+
image:
11+
target_width: '951'
12+
target_height: '1268'
1013
classes:
1114
- name: letter
1215
description: A formal written correspondence with sender/recipient addresses, date, salutation, body, and closing signature
@@ -300,6 +303,9 @@ classes:
300303
- name: comments
301304
description: Additional notes or remarks about the document. Look for sections labeled 'notes', 'remarks', or 'comments'.
302305
classification:
306+
image:
307+
target_width: '951'
308+
target_height: '1268'
303309
top_p: '0.1'
304310
max_tokens: '4096'
305311
top_k: '5'
@@ -439,6 +445,9 @@ classification:
439445
You are a document classification expert who can analyze and classify multiple documents and their page boundaries within a document package from various domains. Your task is to determine the document type based on its content and structure, using the provided document type definitions. Your output must be valid JSON according to the requested format.
440446
classificationMethod: textbasedHolisticClassification
441447
extraction:
448+
image:
449+
target_width: '951'
450+
target_height: '1268'
442451
top_p: '0.1'
443452
max_tokens: '10000'
444453
top_k: '5'
@@ -594,6 +603,9 @@ summarization:
594603
system_prompt: >-
595604
You are a document summarization expert who can analyze and summarize documents from various domains including medical, financial, legal, and general business documents. Your task is to create a summary that captures the key information, main points, and important details from the document. Your output must be in valid JSON format. \nSummarization Style: Balanced\\nCreate a balanced summary that provides a moderate level of detail. Include the main points and key supporting information, while maintaining the document's overall structure. Aim for a comprehensive yet concise summary.\n Your output MUST be in valid JSON format with markdown content. You MUST strictly adhere to the output format specified in the instructions.
596605
assessment:
606+
image:
607+
target_width: '951'
608+
target_height: 1268
597609
default_confidence_threshold: '0.9'
598610
top_p: '0.1'
599611
max_tokens: '10000'

lib/idp_common_pkg/idp_common/assessment/service.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -678,9 +678,10 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
678678
logger.info(f"Time taken to read text content: {t2 - t1:.2f} seconds")
679679

680680
# Read page images with configurable dimensions
681-
image_config = self.config.get("image", {})
682-
target_width = image_config.get("target_width", 951) # Default fallback
683-
target_height = image_config.get("target_height", 1268)
681+
assessment_config = self.config.get("assessment", {})
682+
image_config = assessment_config.get("image", {})
683+
target_width = image_config.get("target_width")
684+
target_height = image_config.get("target_height")
684685

685686
page_images = []
686687
for page_id in sorted_page_ids:
@@ -689,9 +690,16 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
689690

690691
page = document.pages[page_id]
691692
image_uri = page.image_uri
692-
image_content = image.prepare_image(
693-
image_uri, target_width, target_height
694-
)
693+
694+
if target_width is not None and target_height is not None:
695+
# Cast to int in case config values are strings
696+
target_width = int(target_width)
697+
target_height = int(target_height)
698+
image_content = image.prepare_image(
699+
image_uri, target_width, target_height
700+
)
701+
else:
702+
image_content = image.prepare_image(image_uri) # Uses function defaults
695703
page_images.append(image_content)
696704

697705
t3 = time.time()
@@ -715,7 +723,6 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
715723
logger.info(f"Time taken to read raw OCR results: {t4 - t3:.2f} seconds")
716724

717725
# Get assessment configuration
718-
assessment_config = self.config.get("assessment", {})
719726
model_id = self.config.get("model_id") or assessment_config.get("model")
720727
temperature = _safe_float_conversion(
721728
assessment_config.get("temperature", 0), 0.0

lib/idp_common_pkg/idp_common/classification/service.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -609,12 +609,18 @@ def classify_page_bedrock(
609609
if image_uri:
610610
try:
611611
image_config = self.config.get("image", {})
612-
target_width = image_config.get("target_width", 951) # Default fallback
613-
target_height = image_config.get("target_height", 1268)
614-
615-
image_content = image.prepare_image(
616-
image_uri, target_width, target_height
617-
)
612+
target_width = image_config.get("target_width")
613+
target_height = image_config.get("target_height")
614+
615+
if target_width is not None and target_height is not None:
616+
# Cast to int in case config values are strings
617+
target_width = int(target_width)
618+
target_height = int(target_height)
619+
image_content = image.prepare_image(
620+
image_uri, target_width, target_height
621+
)
622+
else:
623+
image_content = image.prepare_image(image_uri) # Uses function defaults
618624
except Exception as e:
619625
logger.warning(f"Failed to load image content from {image_uri}: {e}")
620626
# Continue without image content

lib/idp_common_pkg/idp_common/extraction/service.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -618,9 +618,10 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
618618
logger.info(f"Time taken to read text content: {t1 - t0:.2f} seconds")
619619

620620
# Read page images with configurable dimensions
621-
image_config = self.config.get("image", {})
622-
target_width = image_config.get("target_width", 951) # Default fallback
623-
target_height = image_config.get("target_height", 1268)
621+
extraction_config = self.config.get("extraction", {})
622+
image_config = extraction_config.get("image", {})
623+
target_width = image_config.get("target_width")
624+
target_height = image_config.get("target_height")
624625

625626
page_images = []
626627
for page_id in sorted_page_ids:
@@ -629,16 +630,21 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
629630

630631
page = document.pages[page_id]
631632
image_uri = page.image_uri
632-
image_content = image.prepare_image(
633-
image_uri, target_width, target_height
634-
)
633+
if target_width is not None and target_height is not None:
634+
# Cast to int in case config values are strings
635+
target_width = int(target_width)
636+
target_height = int(target_height)
637+
image_content = image.prepare_image(
638+
image_uri, target_width, target_height
639+
)
640+
else:
641+
image_content = image.prepare_image(image_uri) # Uses function defaults
635642
page_images.append(image_content)
636643

637644
t2 = time.time()
638645
logger.info(f"Time taken to read images: {t2 - t1:.2f} seconds")
639646

640647
# Get extraction configuration
641-
extraction_config = self.config.get("extraction", {})
642648
model_id = self.config.get("model_id") or extraction_config.get("model")
643649
temperature = float(extraction_config.get("temperature", 0))
644650
top_k = float(extraction_config.get("top_k", 5))

lib/idp_common_pkg/idp_common/ocr/service.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -293,18 +293,15 @@ def _process_single_page(
293293
# Resize image for OCR processing if configured
294294
ocr_img_bytes = img_bytes # Default to original image
295295
if self.resize_config:
296+
from idp_common import image
297+
296298
target_width = self.resize_config.get("target_width")
297299
target_height = self.resize_config.get("target_height")
298300

299-
if target_width and target_height:
300-
from idp_common import image
301-
302-
ocr_img_bytes = image.resize_image(
303-
img_bytes, target_width, target_height
304-
)
305-
logger.debug(
306-
f"Resized image for OCR processing (page {page_id}) to {target_width}x{target_height}"
307-
)
301+
ocr_img_bytes = image.resize_image(img_bytes, target_width, target_height)
302+
logger.debug(
303+
f"Resized image for OCR processing (page {page_id}) to {target_width}x{target_height}"
304+
)
308305

309306
# Process with OCR using potentially resized image
310307
if isinstance(self.enhanced_features, list) and self.enhanced_features:

0 commit comments

Comments
 (0)