aws-solutions-library-samples
diff --git a/‎config_library/pattern-2/bank-statement-sample/config.yaml‎
Lines changed: 14 additions & 8 deletions b/‎config_library/pattern-2/bank-statement-sample/config.yaml‎
Lines changed: 14 additions & 8 deletions
diff --git a/‎config_library/pattern-2/lending-package-sample/config.yaml‎
Lines changed: 17 additions & 8 deletions b/‎config_library/pattern-2/lending-package-sample/config.yaml‎
Lines changed: 17 additions & 8 deletions
diff --git a/‎config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml‎
Lines changed: 17 additions & 8 deletions b/‎config_library/pattern-2/rvl-cdip-package-sample-with-few-shot-examples/config.yaml‎
Lines changed: 17 additions & 8 deletions
diff --git a/‎config_library/pattern-2/rvl-cdip-package-sample/config.yaml‎
Lines changed: 17 additions & 8 deletions b/‎config_library/pattern-2/rvl-cdip-package-sample/config.yaml‎
Lines changed: 17 additions & 8 deletions
diff --git a/‎config_library/pattern-3/rvl-cdip-package-sample/config.yaml‎
Lines changed: 3 additions & 0 deletions b/‎config_library/pattern-3/rvl-cdip-package-sample/config.yaml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎idp_cli/pyproject.toml‎
Lines changed: 16 additions & 20 deletions b/‎idp_cli/pyproject.toml‎
Lines changed: 16 additions & 20 deletions
@@ -404,11 +404,7 @@ assessment:
   image:
     target_height: ""
     target_width: ""
-  granular:
-    enabled: true
-    max_workers: "20"
-    simple_batch_size: "3"
-    list_batch_size: "1"
+  max_workers: "20"
   default_confidence_threshold: "0.8"
   top_p: "0.1"
   max_tokens: "10000"
@@ -461,12 +457,15 @@ assessment:
     - page: Page number where the field appears (starting from 1)
 
     Coordinate system:
+    - The document images have ruler markings along all edges showing the 0-1000 coordinate scale
+    - Use these ruler markings to determine precise coordinates for bounding boxes
     - Use normalized scale 0-1000 for both x and y axes
     - x1, y1 = top-left corner of bounding box  
     - x2, y2 = bottom-right corner of bounding box
     - Ensure x2 > x1 and y2 > y1
     - Make bounding boxes tight around the actual text content
     - If a field spans multiple lines, create a bounding box that encompasses all relevant text
+    - Reference the ruler markings on the image edges to provide accurate coordinates
     </spatial-localization-guidelines>
 
     <final-instructions>
@@ -528,6 +527,9 @@ assessment:
     }
 
     IMPORTANT:  
+    - The <task-schema> contains raw JSON schema for the attributes you should assess
+    - The <extraction-results> contains the FULL extraction data (you have complete context for cross-referencing)
+    - The <task-instruction> specifies which attributes to focus on
     - For LIST attributes like "Transactions", assess EACH individual item in the list separately with individual bounding boxes
     - Each transaction should be assessed as a separate object in the array with its own spatial coordinates
     - Do NOT provide aggregate assessments for list items - assess each one individually with precise locations
@@ -548,9 +550,13 @@ assessment:
 
     <<CACHEPOINT>>
 
-    <attributes-definitions>
-    {ATTRIBUTE_NAMES_AND_DESCRIPTIONS}
-    </attributes-definitions>
+    <task-schema>
+    {TASK_SCHEMA}
+    </task-schema>
+
+    <task-instruction>
+    {TASK_INSTRUCTION}
+    </task-instruction>
 
     <extraction-results>
     {EXTRACTION_RESULTS}
 
@@ -10,6 +10,7 @@ ocr:
     - name: LAYOUT
     - name: TABLES
     - name: SIGNATURES
+  max_workers: "20"
   image:
     target_width: ""
     target_height: ""
@@ -1188,6 +1189,7 @@ classes:
 classification:
   classificationMethod: multimodalPageLevelClassification
   maxPagesForClassification: "ALL"
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
@@ -1252,6 +1254,7 @@ extraction:
   agentic:
     enabled: false
     review_agent: false
+  max_workers: "20"
   image:
     target_width: ""
     target_height: ""
@@ -1425,14 +1428,10 @@ summarization:
 assessment:
   enabled: true
   validation_enabled: false
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
-  granular:
-    enabled: true
-    max_workers: "20"
-    simple_batch_size: "3"
-    list_batch_size: "1"
   default_confidence_threshold: "0.8"
   top_p: "0.1"
   max_tokens: "10000"
@@ -1485,12 +1484,15 @@ assessment:
     - page: Page number where the field appears (starting from 1)
 
     Coordinate system:
+    - The document images have ruler markings along all edges showing the 0-1000 coordinate scale
+    - Use these ruler markings to determine precise coordinates for bounding boxes
     - Use normalized scale 0-1000 for both x and y axes
     - x1, y1 = top-left corner of bounding box  
     - x2, y2 = bottom-right corner of bounding box
     - Ensure x2 > x1 and y2 > y1
     - Make bounding boxes tight around the actual text content
     - If a field spans multiple lines, create a bounding box that encompasses all relevant text
+    - Reference the ruler markings on the image edges to provide accurate coordinates
     </spatial-localization-guidelines>
 
     <final-instructions>
@@ -1552,6 +1554,9 @@ assessment:
     }
 
     IMPORTANT:  
+    - The <task-schema> contains raw JSON schema for the attributes you should assess
+    - The <extraction-results> contains the FULL extraction data (you have complete context for cross-referencing)
+    - The <task-instruction> specifies which attributes to focus on
     - For LIST attributes like "Transactions", assess EACH individual item in the list separately with individual bounding boxes
     - Each transaction should be assessed as a separate object in the array with its own spatial coordinates
     - Do NOT provide aggregate assessments for list items - assess each one individually with precise locations
@@ -1572,9 +1577,13 @@ assessment:
 
     <<CACHEPOINT>>
 
-    <attributes-definitions>
-    {ATTRIBUTE_NAMES_AND_DESCRIPTIONS}
-    </attributes-definitions>
+    <task-schema>
+    {TASK_SCHEMA}
+    </task-schema>
+
+    <task-instruction>
+    {TASK_INSTRUCTION}
+    </task-instruction>
 
     <extraction-results>
     {EXTRACTION_RESULTS}
 
@@ -11,6 +11,7 @@ ocr:
     - name: LAYOUT
     - name: TABLES
     - name: SIGNATURES
+  max_workers: "20"
   image:
     dpi: "150"
     target_width: ""
@@ -809,6 +810,7 @@ classes:
 
 classification:
   maxPagesForClassification: "ALL"
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
@@ -863,6 +865,7 @@ classification:
     5. Outputting in the exact JSON format specified in <output-format>
     </final-instructions>
 extraction:
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
@@ -963,14 +966,10 @@ extraction:
 assessment:
   enabled: true
   validation_enabled: false
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
-  granular:
-    enabled: true
-    max_workers: "20"
-    simple_batch_size: "3"
-    list_batch_size: "1"
   default_confidence_threshold: "0.8"
   top_p: "0.1"
   max_tokens: "10000"
@@ -1023,12 +1022,15 @@ assessment:
     - page: Page number where the field appears (starting from 1)
 
     Coordinate system:
+    - The document images have ruler markings along all edges showing the 0-1000 coordinate scale
+    - Use these ruler markings to determine precise coordinates for bounding boxes
     - Use normalized scale 0-1000 for both x and y axes
     - x1, y1 = top-left corner of bounding box  
     - x2, y2 = bottom-right corner of bounding box
     - Ensure x2 > x1 and y2 > y1
     - Make bounding boxes tight around the actual text content
     - If a field spans multiple lines, create a bounding box that encompasses all relevant text
+    - Reference the ruler markings on the image edges to provide accurate coordinates
     </spatial-localization-guidelines>
 
     <final-instructions>
@@ -1090,6 +1092,9 @@ assessment:
     }
 
     IMPORTANT:  
+    - The <task-schema> contains raw JSON schema for the attributes you should assess
+    - The <extraction-results> contains the FULL extraction data (you have complete context for cross-referencing)
+    - The <task-instruction> specifies which attributes to focus on
     - For LIST attributes like "Transactions", assess EACH individual item in the list separately with individual bounding boxes
     - Each transaction should be assessed as a separate object in the array with its own spatial coordinates
     - Do NOT provide aggregate assessments for list items - assess each one individually with precise locations
@@ -1110,9 +1115,13 @@ assessment:
 
     <<CACHEPOINT>>
 
-    <attributes-definitions>
-    {ATTRIBUTE_NAMES_AND_DESCRIPTIONS}
-    </attributes-definitions>
+    <task-schema>
+    {TASK_SCHEMA}
+    </task-schema>
+
+    <task-instruction>
+    {TASK_INSTRUCTION}
+    </task-instruction>
 
     <extraction-results>
     {EXTRACTION_RESULTS}
 
@@ -10,6 +10,7 @@ ocr:
     - name: LAYOUT
     - name: TABLES
     - name: SIGNATURES
+  max_workers: "20"
   image:
     dpi: "150"
     target_width: ""
@@ -764,6 +765,7 @@ classes:
           labeled 'notes', 'remarks', or 'comments'.
 classification:
   maxPagesForClassification: "ALL"
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
@@ -906,6 +908,7 @@ classification:
     You are a document classification expert who can analyze and classify multiple documents and their page boundaries within a document package from various domains. Your task is to determine the document type based on its content and structure, using the provided document type definitions. Your output must be valid JSON according to the requested format.
   classificationMethod: textbasedHolisticClassification
 extraction:
+  max_workers: "20"
   image:
     target_width: ""
     target_height: ""
@@ -1067,14 +1070,10 @@ summarization:
 assessment:
   enabled: true
   validation_enabled: false
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
-  granular:
-    enabled: true
-    max_workers: "20"
-    simple_batch_size: "3"
-    list_batch_size: "1"
   default_confidence_threshold: "0.8"
   top_p: "0.1"
   max_tokens: "10000"
@@ -1127,12 +1126,15 @@ assessment:
     - page: Page number where the field appears (starting from 1)
 
     Coordinate system:
+    - The document images have ruler markings along all edges showing the 0-1000 coordinate scale
+    - Use these ruler markings to determine precise coordinates for bounding boxes
     - Use normalized scale 0-1000 for both x and y axes
     - x1, y1 = top-left corner of bounding box  
     - x2, y2 = bottom-right corner of bounding box
     - Ensure x2 > x1 and y2 > y1
     - Make bounding boxes tight around the actual text content
     - If a field spans multiple lines, create a bounding box that encompasses all relevant text
+    - Reference the ruler markings on the image edges to provide accurate coordinates
     </spatial-localization-guidelines>
 
     <final-instructions>
@@ -1194,6 +1196,9 @@ assessment:
     }
 
     IMPORTANT:  
+    - The <task-schema> contains raw JSON schema for the attributes you should assess
+    - The <extraction-results> contains the FULL extraction data (you have complete context for cross-referencing)
+    - The <task-instruction> specifies which attributes to focus on
     - For LIST attributes like "Transactions", assess EACH individual item in the list separately with individual bounding boxes
     - Each transaction should be assessed as a separate object in the array with its own spatial coordinates
     - Do NOT provide aggregate assessments for list items - assess each one individually with precise locations
@@ -1214,9 +1219,13 @@ assessment:
 
     <<CACHEPOINT>>
 
-    <attributes-definitions>
-    {ATTRIBUTE_NAMES_AND_DESCRIPTIONS}
-    </attributes-definitions>
+    <task-schema>
+    {TASK_SCHEMA}
+    </task-schema>
+
+    <task-instruction>
+    {TASK_INSTRUCTION}
+    </task-instruction>
 
     <extraction-results>
     {EXTRACTION_RESULTS}
 
@@ -11,6 +11,7 @@ ocr:
     - name: LAYOUT
     - name: TABLES
     - name: SIGNATURES
+  max_workers: "20"
   image:
     dpi: "150"
     target_width: ""
@@ -766,6 +767,7 @@ classes:
 classification:
   model: Custom fine tuned UDOP model
 extraction:
+  max_workers: "20"
   image:
     target_width: ""
     target_height: ""
@@ -926,6 +928,7 @@ summarization:
     You are a document summarization expert who can analyze and summarize documents from various domains including medical, financial, legal, and general business documents. Your task is to create a summary that captures the key information, main points, and important details from the document. Your output must be in valid JSON format. \nSummarization Style: Balanced\\nCreate a balanced summary that provides a moderate level of detail. Include the main points and key supporting information, while maintaining the document's overall structure. Aim for a comprehensive yet concise summary.\n Your output MUST be in valid JSON format with markdown content. You MUST strictly adhere to the output format specified in the instructions.
 assessment:
   enabled: true
+  max_workers: "20"
   image:
     target_height: ""
     target_width: ""
 
@@ -9,32 +9,28 @@ build-backend = "setuptools.build_meta"
 name = "idp-cli"
 version = "0.4.5"
 description = "Command-line interface for IDP Accelerator batch document processing"
-authors = [{name = "AWS"}]
-license = {text = "MIT-0"}
-requires-python = ">=3.10"
+authors = [{ name = "AWS" }]
+license = { text = "MIT-0" }
+requires-python = ">=3.12"
 classifiers = [
-    "Development Status :: 4 - Beta",
-    "Intended Audience :: Developers",
-    "License :: OSI Approved :: MIT License",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
-    "Programming Language :: Python :: 3.13",
+  "Development Status :: 4 - Beta",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: MIT License",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
 ]
 dependencies = [
-    "click>=8.1.0",
-    "rich>=13.0.0",
-    "boto3>=1.28.0",
-    "botocore>=1.31.0",
+  "click>=8.1.0",
+  "rich>=13.0.0",
+  "boto3>=1.28.0",
+  "botocore>=1.31.0",
 ]
 
 [project.optional-dependencies]
-test = [
-    "pytest>=7.4.0",
-    "pytest-mock>=3.11.0",
-    "moto>=4.2.0",
-]
+test = ["pytest>=7.4.0", "pytest-mock>=3.11.0", "moto>=4.2.0"]
 
 [project.scripts]
 idp-cli = "idp_cli.cli:main"