aws-solutions-library-samples
diff --git a/‎config_library/pattern-2/bank-statement-sample/config.yaml‎
Lines changed: 14 additions & 8 deletions b/‎config_library/pattern-2/bank-statement-sample/config.yaml‎
Lines changed: 14 additions & 8 deletions
diff --git a/‎config_library/pattern-2/lending-package-sample/config.yaml‎
Lines changed: 16 additions & 8 deletions b/‎config_library/pattern-2/lending-package-sample/config.yaml‎
Lines changed: 16 additions & 8 deletions
diff --git a/‎config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml‎
Lines changed: 17 additions & 8 deletions b/‎config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml‎
Lines changed: 17 additions & 8 deletions
diff --git a/‎config_library/pattern-2/rvl-cdip-package-sample/config.yaml‎
Lines changed: 17 additions & 8 deletions b/‎config_library/pattern-2/rvl-cdip-package-sample/config.yaml‎
Lines changed: 17 additions & 8 deletions
diff --git a/‎config_library/pattern-3/rvl-cdip-package-sample/config.yaml‎
Lines changed: 3 additions & 0 deletions b/‎config_library/pattern-3/rvl-cdip-package-sample/config.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎idp_cli/pyproject.toml‎
Lines changed: 16 additions & 20 deletions b/‎idp_cli/pyproject.toml‎
Lines changed: 16 additions & 20 deletions
@@ -405,11 +405,7 @@ assessment:
   image:
     target_height: ""
     target_width: ""
-  granular:
-    enabled: true
-    max_workers: "20"
-    simple_batch_size: "3"
-    list_batch_size: "1"
+  max_workers: "20"
   default_confidence_threshold: "0.8"
   top_p: "0.0"
   max_tokens: "10000"
@@ -462,12 +458,15 @@ assessment:
     - page: Page number where the field appears (starting from 1)
 
     Coordinate system:
+    - The document images have ruler markings along all edges showing the 0-1000 coordinate scale
+    - Use these ruler markings to determine precise coordinates for bounding boxes
     - Use normalized scale 0-1000 for both x and y axes
     - x1, y1 = top-left corner of bounding box  
     - x2, y2 = bottom-right corner of bounding box
     - Ensure x2 > x1 and y2 > y1
     - Make bounding boxes tight around the actual text content
     - If a field spans multiple lines, create a bounding box that encompasses all relevant text
+    - Reference the ruler markings on the image edges to provide accurate coordinates
     </spatial-localization-guidelines>
 
     <final-instructions>
@@ -529,6 +528,9 @@ assessment:
     }
 
     IMPORTANT:  
+    - The <task-schema> contains raw JSON schema for the attributes you should assess
+    - The <extraction-results> contains the FULL extraction data (you have complete context for cross-referencing)
+    - The <task-instruction> specifies which attributes to focus on
     - For LIST attributes like "Transactions", assess EACH individual item in the list separately with individual bounding boxes
     - Each transaction should be assessed as a separate object in the array with its own spatial coordinates
     - Do NOT provide aggregate assessments for list items - assess each one individually with precise locations
@@ -549,9 +551,13 @@ assessment:
 
     <<CACHEPOINT>>
 
-    <attributes-definitions>
-    {ATTRIBUTE_NAMES_AND_DESCRIPTIONS}
-    </attributes-definitions>
+    <task-schema>
+    {TASK_SCHEMA}
+    </task-schema>
+
+    <task-instruction>
+    {TASK_INSTRUCTION}
+    </task-instruction>
 
     <extraction-results>
     {EXTRACTION_RESULTS}
 
@@ -10,6 +10,7 @@ ocr:
     - name: LAYOUT
     - name: TABLES
     - name: SIGNATURES
+  max_workers: "20"
   image:
     target_width: ""
     target_height: ""
@@ -1253,6 +1254,7 @@ extraction:
   agentic:
     enabled: false
     review_agent: false
+  max_workers: "20"
   image:
     target_width: ""
     target_height: ""
@@ -1426,14 +1428,10 @@ summarization:
 assessment:
   enabled: true
   validation_enabled: false
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
-  granular:
-    enabled: true
-    max_workers: "20"
-    simple_batch_size: "3"
-    list_batch_size: "1"
   default_confidence_threshold: "0.8"
   top_p: "0.0"
   max_tokens: "10000"
@@ -1486,12 +1484,15 @@ assessment:
     - page: Page number where the field appears (starting from 1)
 
     Coordinate system:
+    - The document images have ruler markings along all edges showing the 0-1000 coordinate scale
+    - Use these ruler markings to determine precise coordinates for bounding boxes
     - Use normalized scale 0-1000 for both x and y axes
     - x1, y1 = top-left corner of bounding box  
     - x2, y2 = bottom-right corner of bounding box
     - Ensure x2 > x1 and y2 > y1
     - Make bounding boxes tight around the actual text content
     - If a field spans multiple lines, create a bounding box that encompasses all relevant text
+    - Reference the ruler markings on the image edges to provide accurate coordinates
     </spatial-localization-guidelines>
 
     <final-instructions>
@@ -1553,6 +1554,9 @@ assessment:
     }
 
     IMPORTANT:  
+    - The <task-schema> contains raw JSON schema for the attributes you should assess
+    - The <extraction-results> contains the FULL extraction data (you have complete context for cross-referencing)
+    - The <task-instruction> specifies which attributes to focus on
     - For LIST attributes like "Transactions", assess EACH individual item in the list separately with individual bounding boxes
     - Each transaction should be assessed as a separate object in the array with its own spatial coordinates
     - Do NOT provide aggregate assessments for list items - assess each one individually with precise locations
@@ -1573,9 +1577,13 @@ assessment:
 
     <<CACHEPOINT>>
 
-    <attributes-definitions>
-    {ATTRIBUTE_NAMES_AND_DESCRIPTIONS}
-    </attributes-definitions>
+    <task-schema>
+    {TASK_SCHEMA}
+    </task-schema>
+
+    <task-instruction>
+    {TASK_INSTRUCTION}
+    </task-instruction>
 
     <extraction-results>
     {EXTRACTION_RESULTS}
 
@@ -11,6 +11,7 @@ ocr:
     - name: LAYOUT
     - name: TABLES
     - name: SIGNATURES
+  max_workers: "20"
   image:
     dpi: "150"
     target_width: ""
@@ -809,6 +810,7 @@ classes:
 
 classification:
   maxPagesForClassification: "ALL"
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
@@ -868,6 +870,7 @@ classification:
     5. Outputting in the exact JSON format specified in <output-format>
     </final-instructions>
 extraction:
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
@@ -973,14 +976,10 @@ extraction:
 assessment:
   enabled: true
   validation_enabled: false
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
-  granular:
-    enabled: true
-    max_workers: "20"
-    simple_batch_size: "3"
-    list_batch_size: "1"
   default_confidence_threshold: "0.8"
   top_p: "0.0"
   max_tokens: "10000"
@@ -1033,12 +1032,15 @@ assessment:
     - page: Page number where the field appears (starting from 1)
 
     Coordinate system:
+    - The document images have ruler markings along all edges showing the 0-1000 coordinate scale
+    - Use these ruler markings to determine precise coordinates for bounding boxes
     - Use normalized scale 0-1000 for both x and y axes
     - x1, y1 = top-left corner of bounding box  
     - x2, y2 = bottom-right corner of bounding box
     - Ensure x2 > x1 and y2 > y1
     - Make bounding boxes tight around the actual text content
     - If a field spans multiple lines, create a bounding box that encompasses all relevant text
+    - Reference the ruler markings on the image edges to provide accurate coordinates
     </spatial-localization-guidelines>
 
     <final-instructions>
@@ -1100,6 +1102,9 @@ assessment:
     }
 
     IMPORTANT:  
+    - The <task-schema> contains raw JSON schema for the attributes you should assess
+    - The <extraction-results> contains the FULL extraction data (you have complete context for cross-referencing)
+    - The <task-instruction> specifies which attributes to focus on
     - For LIST attributes like "Transactions", assess EACH individual item in the list separately with individual bounding boxes
     - Each transaction should be assessed as a separate object in the array with its own spatial coordinates
     - Do NOT provide aggregate assessments for list items - assess each one individually with precise locations
@@ -1120,9 +1125,13 @@ assessment:
 
     <<CACHEPOINT>>
 
-    <attributes-definitions>
-    {ATTRIBUTE_NAMES_AND_DESCRIPTIONS}
-    </attributes-definitions>
+    <task-schema>
+    {TASK_SCHEMA}
+    </task-schema>
+
+    <task-instruction>
+    {TASK_INSTRUCTION}
+    </task-instruction>
 
     <extraction-results>
     {EXTRACTION_RESULTS}
 
@@ -10,6 +10,7 @@ ocr:
     - name: LAYOUT
     - name: TABLES
     - name: SIGNATURES
+  max_workers: "20"
   image:
     dpi: "150"
     target_width: ""
@@ -764,6 +765,7 @@ classes:
           labeled 'notes', 'remarks', or 'comments'.
 classification:
   maxPagesForClassification: "ALL"
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
@@ -907,6 +909,7 @@ classification:
   classificationMethod: textbasedHolisticClassification
   sectionSplitting: llm_determined
 extraction:
+  max_workers: "20"
   image:
     target_width: ""
     target_height: ""
@@ -1068,14 +1071,10 @@ summarization:
 assessment:
   enabled: true
   validation_enabled: false
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
-  granular:
-    enabled: true
-    max_workers: "20"
-    simple_batch_size: "3"
-    list_batch_size: "1"
   default_confidence_threshold: "0.8"
   top_p: "0.0"
   max_tokens: "10000"
@@ -1128,12 +1127,15 @@ assessment:
     - page: Page number where the field appears (starting from 1)
 
     Coordinate system:
+    - The document images have ruler markings along all edges showing the 0-1000 coordinate scale
+    - Use these ruler markings to determine precise coordinates for bounding boxes
     - Use normalized scale 0-1000 for both x and y axes
     - x1, y1 = top-left corner of bounding box  
     - x2, y2 = bottom-right corner of bounding box
     - Ensure x2 > x1 and y2 > y1
     - Make bounding boxes tight around the actual text content
     - If a field spans multiple lines, create a bounding box that encompasses all relevant text
+    - Reference the ruler markings on the image edges to provide accurate coordinates
     </spatial-localization-guidelines>
 
     <final-instructions>
@@ -1195,6 +1197,9 @@ assessment:
     }
 
     IMPORTANT:  
+    - The <task-schema> contains raw JSON schema for the attributes you should assess
+    - The <extraction-results> contains the FULL extraction data (you have complete context for cross-referencing)
+    - The <task-instruction> specifies which attributes to focus on
     - For LIST attributes like "Transactions", assess EACH individual item in the list separately with individual bounding boxes
     - Each transaction should be assessed as a separate object in the array with its own spatial coordinates
     - Do NOT provide aggregate assessments for list items - assess each one individually with precise locations
@@ -1215,9 +1220,13 @@ assessment:
 
     <<CACHEPOINT>>
 
-    <attributes-definitions>
-    {ATTRIBUTE_NAMES_AND_DESCRIPTIONS}
-    </attributes-definitions>
+    <task-schema>
+    {TASK_SCHEMA}
+    </task-schema>
+
+    <task-instruction>
+    {TASK_INSTRUCTION}
+    </task-instruction>
 
     <extraction-results>
     {EXTRACTION_RESULTS}
 
@@ -11,6 +11,7 @@ ocr:
     - name: LAYOUT
     - name: TABLES
     - name: SIGNATURES
+  max_workers: "20"
   image:
     dpi: "150"
     target_width: ""
@@ -766,6 +767,7 @@ classes:
 classification:
   model: Custom fine tuned UDOP model
 extraction:
+  max_workers: "20"
   image:
     target_width: ""
     target_height: ""
@@ -926,6 +928,7 @@ summarization:
     You are a document summarization expert who can analyze and summarize documents from various domains including medical, financial, legal, and general business documents. Your task is to create a summary that captures the key information, main points, and important details from the document. Your output must be in valid JSON format. \nSummarization Style: Balanced\\nCreate a balanced summary that provides a moderate level of detail. Include the main points and key supporting information, while maintaining the document's overall structure. Aim for a comprehensive yet concise summary.\n Your output MUST be in valid JSON format with markdown content. You MUST strictly adhere to the output format specified in the instructions.
 assessment:
   enabled: true
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
 
@@ -9,32 +9,28 @@ build-backend = "setuptools.build_meta"
 name = "idp-cli"
 version = "0.4.5"
 description = "Command-line interface for IDP Accelerator batch document processing"
-authors = [{name = "AWS"}]
-license = {text = "MIT-0"}
-requires-python = ">=3.10"
+authors = [{ name = "AWS" }]
+license = { text = "MIT-0" }
+requires-python = ">=3.12"
 classifiers = [
-    "Development Status :: 4 - Beta",
-    "Intended Audience :: Developers",
-    "License :: OSI Approved :: MIT License",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: 3.13",
+  "Development Status :: 4 - Beta",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: MIT License",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
 ]
 dependencies = [
-    "click>=8.1.0",
-    "rich>=13.0.0",
-    "boto3>=1.28.0",
-    "botocore>=1.31.0",
+  "click>=8.1.0",
+  "rich>=13.0.0",
+  "boto3>=1.28.0",
+  "botocore>=1.31.0",
 ]
 
 [project.optional-dependencies]
-test = [
-    "pytest>=7.4.0",
-    "pytest-mock>=3.11.0",
-    "moto>=4.2.0",
-]
+test = ["pytest>=7.4.0", "pytest-mock>=3.11.0", "moto>=4.2.0"]
 
 [project.scripts]
 idp-cli = "idp_cli.cli:main"