aws-solutions-library-samples
diff --git a/‎.gitlab-ci.yml‎
Lines changed: 0 additions & 1 deletion b/‎.gitlab-ci.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 17 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎config_library/pattern-1/lending-package-sample/config.yaml‎
Lines changed: 167 additions & 101 deletions b/‎config_library/pattern-1/lending-package-sample/config.yaml‎
Lines changed: 167 additions & 101 deletions
@@ -51,7 +51,6 @@ developer_tests:
 
 integration_tests:
   stage: integration_tests
-  timeout: 2h
   # variables:
   #   # In order to run tests in another account, add a AWS_CREDS_TARGET_ROLE variable to the Gitlab pipeline variables.
   #   AWS_CREDS_TARGET_ROLE: ${AWS_CREDS_TARGET_ROLE}
 
@@ -5,6 +5,23 @@ SPDX-License-Identifier: MIT-0
 
 ## [Unreleased]
 
+## [0.3.19]
+
+### Added
+
+- **Error Analyzer (Troubleshooting Tool) for AI-Powered Failure Diagnosis**
+  - Introduced intelligent AI-powered troubleshooting agent that automatically diagnoses document processing failures using Claude Sonnet 4 with the Strands agent framework
+  - **Key Capabilities**: Natural language query interface, intelligent routing between document-specific and system-wide analysis, multi-source data correlation (CloudWatch Logs, DynamoDB, Step Functions), root cause identification with actionable recommendations, evidence-based analysis with collapsible log details
+  - **Web UI Integration**: Accessible via "Troubleshoot" button on failed documents with real-time job status, progress tracking, automatic job resumption, and formatted results (Root Cause, Recommendations, Evidence sections)
+  - **Tool Ecosystem**: 8 specialized tools including analyze_errors (main router), analyze_document_failure, analyze_recent_system_errors, CloudWatch log search tools, DynamoDB integration tools, and Lambda context retrieval - additional tools will be added as the feature evolves.
+  - **Configuration**: Configurable via Web UI including model selection (Claude Sonnet 4 recommended), system prompt customization, max_log_events (default: 5), and time_range_hours_default (default: 24)
+  - **Documentation**: Comprehensive guide in `docs/error-analyzer.md` with architecture diagrams, usage examples, best practices, troubleshooting guide.
+
+### Fixed
+- Problem with setting correctly formatted WAF IPv4 CIDR range - #73
+
+
+
 ## [0.3.18]
 
 ### Added
 
@@ -104,6 +104,173 @@ evaluation:
     model: us.anthropic.claude-3-haiku-20240307-v1:0
     system_prompt: >-
       You are an evaluator that helps determine if the predicted and expected values match for document attribute extraction. You will consider the context and meaning rather than just exact string matching.
+discovery:
+  output_format:
+    sample_json: |-
+      {
+          "document_class" : "Form-1040",
+          "document_description" : "Brief summary of the document",
+          "groups" : [
+              {
+                  "name" : "PersonalInformation",
+                  "description" : "Personal information of Tax payer",
+                  "attributeType" : "group",
+                  "groupAttributes" : [
+                      {
+                          "name": "FirstName",
+                          "dataType" : "string",
+                          "description" : "First Name of Taxpayer"
+                      },
+                      {
+                          "name": "Age",
+                          "dataType" : "number",
+                          "description" : "Age of Taxpayer"
+                      }
+                  ]
+              },
+              {
+                  "name" : "Dependents",
+                  "description" : "Dependents of taxpayer",
+                  "attributeType" : "list",
+                  "listItemTemplate": {
+                      "itemAttributes" : [
+                          {
+                              "name": "FirstName",
+                              "dataType" : "string",
+                              "description" : "Dependent first name"
+                          },
+                          {
+                              "name": "Age",
+                              "dataType" : "number",
+                              "description" : "Dependent Age"
+                          }
+                      ]
+                  }
+              }
+          ]
+      }
+  with_ground_truth:
+    top_p: '0.1'
+    temperature: '1.0'
+    user_prompt: >-
+      This image contains unstructured data. Analyze the data line by line using the provided ground truth as reference.                        
+      <GROUND_TRUTH_REFERENCE>
+      {ground_truth_json}
+      </GROUND_TRUTH_REFERENCE>
+      Ground truth reference JSON has the fields we are interested in extracting from the document/image. Use the ground truth to optimize field extraction. Match field names, data types, and groupings from the reference.
+      Image may contain multiple pages, process all pages.
+      Extract all field names including those without values.
+      Do not change the group name and field name from ground truth in the extracted data json.
+      Add field_description field for every field which will contain instruction to LLM to extract the field data from the image/document. Add data_type field for every field. 
+      Add two fields document_class and document_description. 
+      For document_class generate a short name based on the document content like W4, I-9, Paystub. 
+      For document_description generate a description about the document in less than 50 words.
+      If the group repeats and follows table format, update the attributeType as "list".                         
+      Do not extract the values.
+      Format the extracted data using the below JSON format:
+      Format the extracted groups and fields using the below JSON format:
+
+    model_id: us.amazon.nova-pro-v1:0
+    system_prompt: >-
+      You are an expert in processing forms. Extracting data from images and
+      documents. Use provided ground truth data as reference to optimize field
+      extraction and ensure consistency with expected document structure and
+      field definitions.
+    max_tokens: '10000'
+  without_ground_truth:
+    top_p: '0.1'
+    temperature: '1.0'
+    user_prompt: >-
+      This image contains forms data. Analyze the form line by line.
+      Image may contains multiple pages, process all the pages. 
+      Form may contain multiple name value pair in one line. 
+      Extract all the names in the form including the name value pair which doesn't have value. 
+      Organize them into groups, extract field_name, data_type and field description
+      Field_name should be less than 60 characters, should not have space use '-' instead of space.
+      field_description is a brief description of the field and the location of the field like box number or line number in the form and section of the form.
+      Field_name should be unique within the group.
+      Add two fields document_class and document_description. 
+      For document_class generate a short name based on the document content like W4, I-9, Paystub. 
+      For document_description generate a description about the document in less than 50 words. 
+
+      Group the fields based on the section they are grouped in the form. Group should have attributeType as "group".
+      If the group repeats and follows table format, update the attributeType as "list".
+      Do not extract the values.
+      Return the extracted data in JSON format.
+      Format the extracted data using the below JSON format:
+      Format the extracted groups and fields using the below JSON format:
+    model_id: us.amazon.nova-pro-v1:0
+    system_prompt: >-
+      You are an expert in processing forms. Extracting data from images and
+      documents. Analyze forms line by line to identify field names, data types,
+      and organizational structure. Focus on creating comprehensive blueprints
+      for document processing without extracting actual values.
+    max_tokens: '10000'
+agents:
+  error_analyzer:
+    model_id: us.anthropic.claude-sonnet-4-20250514-v1:0
+
+    system_prompt: |-
+      You are an intelligent error analysis agent for the GenAI IDP system.
+
+      Use the analyze_errors tool to investigate issues. ALWAYS format your response with exactly these three sections in this order:
+
+      ## Root Cause
+      Identify the specific underlying technical reason why the error occurred. Focus on the primary cause, not symptoms.
+
+      ## Recommendations
+      Provide specific, actionable steps to resolve the issue. Limit to top three recommendations only.
+
+      <details>
+      <summary><strong>Evidence</strong></summary>
+
+      Format log entries with their source information. For each log entry, show:
+      **Log Group:**  
+      [full log_group name from tool response]
+
+      **Log Stream:**  
+      [full log_stream name from tool response]
+      ```
+      [ERROR] timestamp message (from events data)
+      ```
+
+      </details>
+
+      FORMATTING RULES:
+      - Use the exact three-section structure above
+      - Make Evidence section collapsible using HTML details tags
+      - Extract log_group, log_stream, and events data from tool response
+      - Show complete log group and log stream names without truncation
+      - Present actual log messages from events array in code blocks
+
+      RECOMMENDATION GUIDELINES:
+      For code-related issues or system bugs:
+      - Do not suggest code modifications
+      - Include error details, timestamps, and context
+
+      For configuration-related issues:
+      - Direct users to UI configuration panel
+      - Specify exact configuration section and parameter names
+
+      For operational issues:
+      - Provide immediate troubleshooting steps
+      - Include preventive measures
+
+      TIME RANGE PARSING:
+      - recent/recently: 1 hour
+      - last week: 168 hours  
+      - last day/yesterday: 24 hours
+      - No time specified: 24 hours (default)
+
+      SPECIAL CASES:
+      If analysis_type is "document_not_found": explain document cannot be located, focus on verification steps and processing issues.
+
+      DO NOT include code suggestions, technical summaries, or multiple paragraphs of explanation. Keep responses concise and actionable.
+
+      IMPORTANT: Do not include any search quality reflections, search quality scores, or meta-analysis sections in your response. Only provide the three required sections: Root Cause, Recommendations, and Evidence.
+    parameters:
+      max_log_events: 5
+      time_range_hours_default: 24
 pricing:
   - name: bda/documents-custom
     units:
@@ -244,105 +411,4 @@ pricing:
     units:
       - name: gb_seconds
         price: '1.66667E-5'   # $0.0000166667 per GB-second ($16.67 per 1M GB-seconds)
-discovery:
-  output_format:
-    sample_json: |-
-      {
-          "document_class" : "Form-1040",
-          "document_description" : "Brief summary of the document",
-          "groups" : [
-              {
-                  "name" : "PersonalInformation",
-                  "description" : "Personal information of Tax payer",
-                  "attributeType" : "group",
-                  "groupAttributes" : [
-                      {
-                          "name": "FirstName",
-                          "dataType" : "string",
-                          "description" : "First Name of Taxpayer"
-                      },
-                      {
-                          "name": "Age",
-                          "dataType" : "number",
-                          "description" : "Age of Taxpayer"
-                      }
-                  ]
-              },
-              {
-                  "name" : "Dependents",
-                  "description" : "Dependents of taxpayer",
-                  "attributeType" : "list",
-                  "listItemTemplate": {
-                      "itemAttributes" : [
-                          {
-                              "name": "FirstName",
-                              "dataType" : "string",
-                              "description" : "Dependent first name"
-                          },
-                          {
-                              "name": "Age",
-                              "dataType" : "number",
-                              "description" : "Dependent Age"
-                          }
-                      ]
-                  }
-              }
-          ]
-      }
-  with_ground_truth:
-    top_p: '0.1'
-    temperature: '1.0'
-    user_prompt: >-
-      This image contains unstructured data. Analyze the data line by line using the provided ground truth as reference.                        
-      <GROUND_TRUTH_REFERENCE>
-      {ground_truth_json}
-      </GROUND_TRUTH_REFERENCE>
-      Ground truth reference JSON has the fields we are interested in extracting from the document/image. Use the ground truth to optimize field extraction. Match field names, data types, and groupings from the reference.
-      Image may contain multiple pages, process all pages.
-      Extract all field names including those without values.
-      Do not change the group name and field name from ground truth in the extracted data json.
-      Add field_description field for every field which will contain instruction to LLM to extract the field data from the image/document. Add data_type field for every field. 
-      Add two fields document_class and document_description. 
-      For document_class generate a short name based on the document content like W4, I-9, Paystub. 
-      For document_description generate a description about the document in less than 50 words.
-      If the group repeats and follows table format, update the attributeType as "list".                         
-      Do not extract the values.
-      Format the extracted data using the below JSON format:
-      Format the extracted groups and fields using the below JSON format:
-        
-    model_id: us.amazon.nova-pro-v1:0
-    system_prompt: >-
-      You are an expert in processing forms. Extracting data from images and
-      documents. Use provided ground truth data as reference to optimize field
-      extraction and ensure consistency with expected document structure and
-      field definitions.
-    max_tokens: '10000'
-  without_ground_truth:
-    top_p: '0.1'
-    temperature: '1.0'
-    user_prompt: >-
-      This image contains forms data. Analyze the form line by line.
-      Image may contains multiple pages, process all the pages. 
-      Form may contain multiple name value pair in one line. 
-      Extract all the names in the form including the name value pair which doesn't have value. 
-      Organize them into groups, extract field_name, data_type and field description
-      Field_name should be less than 60 characters, should not have space use '-' instead of space.
-      field_description is a brief description of the field and the location of the field like box number or line number in the form and section of the form.
-      Field_name should be unique within the group.
-      Add two fields document_class and document_description. 
-      For document_class generate a short name based on the document content like W4, I-9, Paystub. 
-      For document_description generate a description about the document in less than 50 words. 
 
-      Group the fields based on the section they are grouped in the form. Group should have attributeType as "group".
-      If the group repeats and follows table format, update the attributeType as "list".
-      Do not extract the values.
-      Return the extracted data in JSON format.
-      Format the extracted data using the below JSON format:
-      Format the extracted groups and fields using the below JSON format:
-    model_id: us.amazon.nova-pro-v1:0
-    system_prompt: >-
-      You are an expert in processing forms. Extracting data from images and
-      documents. Analyze forms line by line to identify field names, data types,
-      and organizational structure. Focus on creating comprehensive blueprints
-      for document processing without extracting actual values.
-    max_tokens: '10000'