Merge branch 'feature/pipeline-error-logging' into 'develop'

rstrahan · rstrahan · commit 049302965e09 · 2025-11-12T20:13:33.000Z
Feature/pipeline error logging

See merge request genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator!420
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -43,14 +43,15 @@ developer_tests:
     - uv venv .venv
     - source .venv/bin/activate
     # Install Node.js and npm for basedpyright
-    - curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
+    - curl -fsSL https://deb.nodesource.com/setup_22.x | bash -
     - apt-get install -y nodejs
+    - npm install -g npm@11
     - npm install -g basedpyright
     - uv pip install ruff
     # Install dependencies needed by publish.py for test imports
     - uv pip install typer rich boto3
     # Install test dependencies
-    - cd lib/idp_common_pkg && uv pip install -e ".[test]" && cd ../..
+    - cd lib/idp_common_pkg && uv pip install -e ".[test,evaluation]" && cd ../..
 
   script:
     - make lint-cicd
@@ -163,15 +164,20 @@ integration_tests:
           
           # Save full logs to artifact
           echo "$FULL_LOGS" > codebuild_logs.txt
-          echo "📁 Full deployment logs saved to: codebuild_logs.txt"
-          
+          echo -e "\033[1;36m📁 Full deployment logs saved to: \033[1;33mcodebuild_logs.txt\033[0m"
+          echo -e "\033[1;34m🔗 View logs: ${CI_JOB_URL}/artifacts/external_file/codebuild_logs.txt\033[0m"
           # Extract and show deployment summary (AI or manual)
           echo ""
-          SUMMARY=$(echo "$FULL_LOGS" | grep -A 100 "🤖 Generating deployment summary with Bedrock..." | head -100)
+          SUMMARY=$(echo "$FULL_LOGS" | sed -n '/🤖 Generating deployment summary with Bedrock.../,$p' | sed 's/^\t*//' | grep -v '\[Container\]' | grep -v 'Phase complete:' | grep -v 'Phase context status code:' | grep -v 'Entering phase' || true)
           
           if [ -n "$SUMMARY" ]; then
-            echo "🤖 Deployment Analysis:"
-            echo "$SUMMARY"
+            echo "┌─────────────────────────────────────────────────────────────────────────┐"
+            echo "│                        DEPLOYMENT SUMMARY                               │"
+            echo "├─────────────────────────────────────────────────────────────────────────┤"
+            echo "$SUMMARY" | while IFS= read -r line; do
+              printf "│ %-71s │\n" "$line"
+            done
+            echo "└─────────────────────────────────────────────────────────────────────────┘"
           else
             echo "⚠️ No deployment summary found - check codebuild_logs.txt for details"
           fi
diff --git a/docs/deployment.md b/docs/deployment.md
@@ -114,7 +114,7 @@ You need to have the following packages installed on your computer:
 4. python 3.11 or later
 5. A local Docker daemon
 6. Python packages for publish.py: `pip install boto3 rich typer PyYAML botocore setuptools ruff`
-7. **Node.js 18+** and **npm** (required for UI validation in publish script)
+7. **Node.js 22.12+** and **npm** (required for UI validation in publish script)
 
 For guidance on setting up a development environment, see:
 
diff --git a/docs/setup-development-env-WSL.md b/docs/setup-development-env-WSL.md
@@ -44,7 +44,7 @@ cd accelerated-intelligent-document-processing-on-aws
 ```
 This script automatically installs:
 - Git, Python 3, pip, and build tools
-- Node.js 18
+- Node.js 22
 - AWS CLI v2
 - AWS SAM CLI
 - Python dependencies
@@ -77,8 +77,8 @@ Enter your AWS credentials when prompted. Refer to: https://docs.aws.amazon.com/
 python3 --version (Example: Python 3.12.3)
 aws --version (Example: aws-cli/2.28.26)
 sam --version (Example: SAM CLI, version 1.143.0)
-node --version (Example: v18.20.8)
-npm --version (Example: 10.8.2)
+node --version (Example: v22.12.0)
+npm --version (Example: 11.0.0)
 ```
 ### 4.2 Test Build Process
 ```
diff --git a/docs/setup-development-env-macos.md b/docs/setup-development-env-macos.md
@@ -142,7 +142,7 @@ sam --version
 ```
 
 
-## 7) Node.js via nvm (Node 18 LTS)
+## 7) Node.js via nvm (Node 22 LTS)
 
 Use nvm to manage Node versions. This mirrors your Linux section but uses zsh-friendly profile updates.
 
@@ -156,9 +156,9 @@ fi
 export NVM_DIR="$HOME/.nvm"
 [ -s "$NVM_DIR/nvm.sh" ] && . "$NVM_DIR/nvm.sh"
 
-# Install Node 18 LTS
-nvm install 18
-nvm alias default 18
+# Install Node 22 LTS
+nvm install 22
+nvm alias default 22
 
 # Test
 node -v
diff --git a/package.json b/package.json
@@ -17,7 +17,7 @@
     "typecheck:stats": "basedpyright --stats"
   },
   "engines": {
-    "node": ">=18.0.0",
-    "npm": ">=9.0.0"
+    "node": ">=22.12.0",
+    "npm": ">=11.0.0"
   }
 }
diff --git a/publish.sh b/publish.sh
@@ -108,8 +108,8 @@ check_nodejs_dependencies() {
   node_version=$(node --version 2>/dev/null | sed 's/v//')
   node_major=$(echo "$node_version" | cut -d'.' -f1)
 
-  if [[ "$node_major" -lt 18 ]]; then
-    print_error "Node.js $node_version found, but 18+ is required for UI validation"
+  if [[ "$node_major" -lt 22 ]]; then
+    print_error "Node.js $node_version found, but 22+ is required for UI validation"
     print_info "Please upgrade Node.js to version 18 or later"
     exit 1
   else
diff --git a/scripts/codebuild_deployment.py b/scripts/codebuild_deployment.py
@@ -51,16 +51,15 @@ def run_command(cmd, check=True):
         print(result.stderr, file=sys.stderr)
     if check and result.returncode != 0:
         print(f"Command failed with exit code {result.returncode}")
-        sys.exit(1)
+        raise Exception(f"Command failed: {cmd}")
     return result
 
 
 def get_env_var(name, default=None):
     """Get environment variable with optional default"""
     value = os.environ.get(name, default)
     if value is None:
-        print(f"Error: Environment variable {name} is required")
-        sys.exit(1)
+        raise Exception(f"Environment variable {name} is required")
     return value
 
 
@@ -99,7 +98,7 @@ def publish_templates():
         return template_url
     else:
         print("❌ Failed to extract template URL from publish output")
-        sys.exit(1)
+        raise Exception("Failed to extract template URL from publish output")
 
 
 def deploy_test_and_cleanup_pattern(stack_prefix, pattern_config, admin_email, template_url):
@@ -267,6 +266,65 @@ def get_codebuild_logs():
         return f"Failed to retrieve CodeBuild logs: {str(e)}"
 
 
+def generate_publish_failure_summary(publish_error):
+    """Generate summary for publish/build failures"""
+    try:
+        bedrock = boto3.client('bedrock-runtime')
+        
+        prompt = dedent(f"""
+        You are a build system analyst. Analyze this publish/build failure and provide specific technical guidance.
+
+        Publish Error: {publish_error}
+        
+        Build Logs:
+        {get_codebuild_logs()}
+
+        ANALYZE THE LOGS FOR: npm ci errors, package-lock.json sync issues, missing @esbuild packages, UI build failures
+
+        Create a summary focused on BUILD/PUBLISH issues with bullet points:
+
+        🔧 BUILD FAILURE ANALYSIS
+
+        📋 Component Status:
+        • UI Build: FAILED - npm dependency issues
+        • Lambda Build: SUCCESS - All patterns built correctly
+        • Template Publish: FAILED - S3 access denied
+
+        🔍 Technical Root Cause:
+        • Extract exact npm/pip error messages from logs
+        • Identify specific missing packages or version conflicts
+        • Focus on build-time errors, not deployment errors
+        • Check AWS credentials and S3 bucket permissions
+
+        💡 Fix Commands:
+        • Run: cd src/ui && rm package-lock.json && npm install
+        • Check AWS profile: aws configure list --profile <name>
+        • Verify S3 access: aws s3 ls s3://bucket-name --profile <name>
+        • Update package-lock.json and commit changes
+
+        Keep each bullet point under 75 characters. Use sub-bullets for details.
+        
+        IMPORTANT: Respond ONLY with the bullet format above. Do not include any text before or after.
+        """)
+        
+        response = bedrock.invoke_model(
+            modelId='anthropic.claude-3-5-sonnet-20240620-v1:0',
+            body=json.dumps({
+                "anthropic_version": "bedrock-2023-05-31",
+                "max_tokens": 2000,
+                "messages": [{"role": "user", "content": prompt}]
+            })
+        )
+        
+        response_body = json.loads(response['body'].read())
+        summary = response_body['content'][0]['text']
+        
+        print(summary)
+        
+    except Exception as e:
+        print(f"⚠️ Failed to generate build failure summary: {e}")
+
+
 def generate_deployment_summary(deployment_results, stack_prefix, template_url):
     """
     Generate deployment summary using Bedrock API
@@ -306,38 +364,38 @@ def generate_deployment_summary(deployment_results, stack_prefix, template_url):
         Pattern Results Summary:
         {json.dumps(deployment_results, indent=2)}
 
-        Create a summary with this EXACT format:
-
-        ┌─────────────────────────────────────────────────────────────────────────────┐
-        │                              DEPLOYMENT RESULTS                             │
-        ├─────────────────────────────────────────────────────────────────────────────┤
-        │ Pattern               │ Status    │ Duration  │ Key Metrics               │
-        ├─────────────────────────────────────────────────────────────────────────────┤
-        │ Pattern 1 - BDA       │ SUCCESS   │ 15m 30s   │ 28 functions validated    │
-        │ Pattern 2 - OCR       │ SUCCESS   │ 12m 45s   │ All tests passed          │
-        ├─────────────────────────────────────────────────────────────────────────────┤
-        │                              ROOT CAUSE ANALYSIS                            │
-        ├─────────────────────────────────────────────────────────────────────────────┤
-        │ • CloudWatch log cleanup failed due to concatenated log group names        │
-        │ • AWS CLI text output parsing caused parameter validation errors           │
-        │ • Provide specific error messages, resource names, and failure points      │
-        ├─────────────────────────────────────────────────────────────────────────────┤
-        │                              RECOMMENDATIONS                                │
-        ├─────────────────────────────────────────────────────────────────────────────┤
-        │ • Fix CloudWatch log cleanup to use JSON output instead of text            │
-        │ • Add proper error handling for resource cleanup operations                 │
-        └─────────────────────────────────────────────────────────────────────────────┘
+        Create a summary with clean bullet format:
+
+        🚀 DEPLOYMENT RESULTS
+
+        📋 Pattern Status:
+        • Pattern 1 - BDA: SUCCESS - Stack deployed successfully (120s)
+        • Pattern 2 - OCR: FAILED - CloudFormation CREATE_FAILED (89s)  
+        • Pattern 3 - UDOP: SKIPPED - Not selected for deployment
+
+        🔍 Root Cause Analysis:
+        • Analyze actual deployment results from Pattern Results Summary
+        • Extract specific CloudFormation error messages and resource names
+        • Focus on CREATE_FAILED, UPDATE_FAILED, ROLLBACK events
+        • Check for smoke test failures and their underlying causes
+        • Report Lambda function errors, API Gateway issues, IAM permissions
+
+        💡 Recommendations:
+        • Use actual pattern names and statuses from deployment_results
+        • Include specific CloudFormation stack names and error details
+        • Provide smoke test error details and remediation steps
+
+        Keep each bullet point under 75 characters. Use clean text format.
+        
+        IMPORTANT: Respond ONLY with clean bullet format above. No tables or boxes.
 
         Requirements:
-        - Use EXACT table format above
-        - For failures: provide detailed root cause analysis for ALL failed components (publish, deployments, etc.)
-        - Analyze publish/build logs to determine root cause of template publishing errors
-        - Include specific error messages, resource names, and exact failure points from logs
-        - Include sufficient technical details to understand WHY each component failed
-        - Maximum 2-3 bullet points for recommendations
-        - Keep each line under 75 characters
-        - Extract actual error messages and resource identifiers from logs
-        - For publish failures: check S3 permissions, npm/pip errors, CDK issues, template syntax
+        - Analyze ALL error messages in logs for specific technical details
+        - Include exact CloudFormation/Lambda error messages and specific commands to fix
+        - Extract specific error patterns like "CREATE_FAILED", "UPDATE_FAILED", "ROLLBACK"
+        - Provide detailed technical root cause analysis with specific resource names
+        - Include actionable recommendations with exact terminal commands
+        
         """)
         
         # Call Bedrock API
@@ -359,23 +417,7 @@ def generate_deployment_summary(deployment_results, stack_prefix, template_url):
         response_body = json.loads(response['body'].read())
         summary = response_body['content'][0]['text']
         
-        try:
-            from rich.console import Console
-            from rich.panel import Panel
-            
-            console = Console()
-            console.print(Panel(
-                summary, 
-                title="🤖 AI Deployment Analysis", 
-                border_style="green",
-                padding=(1, 2)
-            ))
-        except ImportError:
-            # Fallback to plain text if Rich not available
-            print("📊 Deployment Summary Generated:")
-            print("=" * 80)
-            print(summary)
-            print("=" * 80)
+        print(summary)
         
         return summary
         
@@ -413,32 +455,6 @@ def generate_deployment_summary(deployment_results, stack_prefix, template_url):
         
         return manual_summary
 
-
-def delete_versioned_bucket(bucket_name):
-    """Delete all versions and delete markers from a versioned S3 bucket, then delete the bucket."""
-    import boto3
-    try:
-        s3 = boto3.client('s3')
-        paginator = s3.get_paginator('list_object_versions')
-        
-        for page in paginator.paginate(Bucket=bucket_name):
-            # Delete object versions
-            if 'Versions' in page:
-                for version in page['Versions']:
-                    s3.delete_object(Bucket=bucket_name, Key=version['Key'], VersionId=version['VersionId'])
-            
-            # Delete delete markers
-            if 'DeleteMarkers' in page:
-                for marker in page['DeleteMarkers']:
-                    s3.delete_object(Bucket=bucket_name, Key=marker['Key'], VersionId=marker['VersionId'])
-        
-        # Delete the bucket
-        s3.delete_bucket(Bucket=bucket_name)
-        return True
-    except Exception:
-        return False
-
-
 def cleanup_stack(stack_name, pattern_name):
     print(f"[{pattern_name}] Cleaning up: {stack_name}")
     try:
@@ -513,10 +529,12 @@ def main():
     try:
         template_url = publish_templates()
         publish_success = True
+        publish_error = None
     except Exception as e:
         print(f"❌ Publish failed: {e}")
         template_url = "N/A - Publish failed"
         publish_success = False
+        publish_error = str(e)
 
     all_success = publish_success
     deployment_results = []
@@ -571,7 +589,10 @@ def main():
     # Step 3: Generate deployment summary using Bedrock (ALWAYS run for analysis)
     print("\n🤖 Generating deployment summary with Bedrock...")
     try:
-        generate_deployment_summary(deployment_results, stack_prefix, template_url)
+        if not publish_success:
+            generate_publish_failure_summary(publish_error)
+        else:
+            generate_deployment_summary(deployment_results, stack_prefix, template_url)
     except Exception as e:
         print(f"⚠️ Failed to generate deployment summary: {e}")
 
diff --git a/scripts/dev_setup_al2023.sh b/scripts/dev_setup_al2023.sh
@@ -48,7 +48,7 @@ wget https://github.com/aws/aws-sam-cli/releases/latest/download/aws-sam-cli-lin
 unzip aws-sam-cli-linux-x86_64.zip -d ./sam-cli
 sudo ./sam-cli/install --update
 
-# node 18
+# node 22
 curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash  # nosemgrep: bash.curl.security.curl-pipe-bash.curl-pipe-bash - Official NVM installation script for development environment only
 source ~/.bashrc
 nvm install 18
diff --git a/scripts/dev_setup_ubuntu.sh b/scripts/dev_setup_ubuntu.sh
@@ -43,7 +43,7 @@ wget -q https://github.com/aws/aws-sam-cli/releases/latest/download/aws-sam-cli-
 unzip -q aws-sam-cli-linux-x86_64.zip -d ./sam-cli
 sudo ./sam-cli/install --update
 
-# node 20
+# node 22
 curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash
 export NVM_DIR="$HOME/.nvm"
 [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
diff --git a/scripts/sdlc/cfn/codepipeline-s3.yml b/scripts/sdlc/cfn/codepipeline-s3.yml
@@ -198,7 +198,7 @@ Resources:
                 python: 3.12
               commands:
                 - n 22 && node --version || { echo "Node setup failed"; exit 1; }
-                - npm install -g aws-cdk || { echo "CDK installation failed"; exit 1; }
+                - npm install -g npm@11 || { echo "npm upgrade failed"; exit 1; }
                 - export IDP_ADMIN_EMAIL=$(aws s3api head-object --bucket genaiic-sdlc-sourcecode-${AWS_ACCOUNT_ID:-020432867916}-${AWS_DEFAULT_REGION:-us-east-1} --key deploy/code.zip --query 'Metadata."gitlab-user-email"' --output text 2>/dev/null || echo "")
                 - cd idp_cli && pip install -e . && cd .. || { echo "IDP CLI installation failed"; exit 1; }
                 - pip install rich || echo "Rich installation failed, using fallback formatting"
diff --git a/scripts/wsl_setup.sh b/scripts/wsl_setup.sh
@@ -29,7 +29,7 @@ sudo apt install build-essential make -y
 python3 --version
 
 # Install Node.js 18
-curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -  # nosemgrep: bash.curl.security.curl-pipe-bash.curl-pipe-bash - Official NodeSource repository with HTTPS verification for development environment only
+curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash -  # nosemgrep: bash.curl.security.curl-pipe-bash.curl-pipe-bash - Official NodeSource repository with HTTPS verification for development environment only
 sudo apt-get install -y nodejs
 
 # Install AWS CLI
diff --git a/src/ui/.npmrc b/src/ui/.npmrc
diff --git a/src/ui/README.md b/src/ui/README.md
diff --git a/src/ui/package-lock.json b/src/ui/package-lock.json
diff --git a/src/ui/package.json b/src/ui/package.json

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@`
`17`	`17`	`"typecheck:stats": "basedpyright --stats"`
`18`	`18`	`},`
`19`	`19`	`"engines": {`
`20`		`- "node": ">=18.0.0",`
`21`		`- "npm": ">=9.0.0"`
	`20`	`+ "node": ">=22.12.0",`
	`21`	`+ "npm": ">=11.0.0"`
`22`	`22`	`}`
`23`	`23`	`}`