Skip to content

Commit ba18ac3

Browse files
author
Taniya Mathur
committed
feat: improve pipeline AI response handling
1 parent 38525c1 commit ba18ac3

File tree

1 file changed

+130
-90
lines changed

1 file changed

+130
-90
lines changed

scripts/codebuild_deployment.py

Lines changed: 130 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def publish_templates():
101101
raise Exception("Failed to extract template URL from publish output")
102102

103103

104-
def deploy_test_and_cleanup_pattern(stack_prefix, pattern_config, admin_email, template_url):
104+
def deploy_and_test_pattern(stack_prefix, pattern_config, admin_email, template_url):
105105
"""Deploy and test a specific IDP pattern"""
106106
pattern_name = pattern_config["name"]
107107
pattern_id = pattern_config["id"]
@@ -319,83 +319,83 @@ def generate_publish_failure_summary(publish_error):
319319
response_body = json.loads(response['body'].read())
320320
summary = response_body['content'][0]['text']
321321

322-
print(summary)
322+
return summary
323+
324+
except Exception as e:
325+
return f"⚠️ Failed to generate build failure summary: {e}"
326+
327+
328+
def get_cloudformation_logs(stack_name):
329+
"""Get CloudFormation stack events for error analysis"""
330+
try:
331+
cf_client = boto3.client('cloudformation')
332+
response = cf_client.describe_stack_events(StackName=stack_name)
333+
events = response.get('StackEvents', [])
334+
335+
# Filter for failed events
336+
failed_events = []
337+
for event in events:
338+
status = event.get('ResourceStatus', '')
339+
if 'FAILED' in status or 'ROLLBACK' in status:
340+
failed_events.append({
341+
'timestamp': event.get('Timestamp', '').isoformat() if event.get('Timestamp') else '',
342+
'resource_type': event.get('ResourceType', ''),
343+
'logical_id': event.get('LogicalResourceId', ''),
344+
'status': status,
345+
'reason': event.get('ResourceStatusReason', 'No reason provided')
346+
})
347+
348+
return failed_events
323349

324350
except Exception as e:
325-
print(f"⚠️ Failed to generate build failure summary: {e}")
351+
return [{'error': f"Failed to retrieve CloudFormation logs: {str(e)}"}]
326352

327353

328354
def generate_deployment_summary(deployment_results, stack_prefix, template_url):
329-
"""
330-
Generate deployment summary using Bedrock API
331-
332-
Args:
333-
deployment_results: List of deployment result dictionaries
334-
stack_prefix: Stack prefix used for deployment
335-
template_url: Template URL used for deployment
336-
337-
Returns:
338-
str: Generated summary text
339-
"""
355+
"""Generate deployment summary using Bedrock API with CodeBuild and CloudFormation logs"""
340356
try:
341357
# Get CodeBuild logs
342358
deployment_logs = get_codebuild_logs()
343359

344-
# Check if log retrieval failed
345-
if deployment_logs.startswith("Failed to retrieve CodeBuild logs"):
346-
raise Exception("CodeBuild logs unavailable")
347-
348360
# Initialize Bedrock client
349361
bedrock = boto3.client('bedrock-runtime')
350362

351-
# Create prompt for Bedrock with actual logs
363+
# Create prompt for Bedrock with CodeBuild logs first
352364
prompt = dedent(f"""
353-
You are an AWS deployment analyst. Analyze the following deployment logs and create a concise summary in table format.
365+
You are an AWS deployment analyst. Analyze deployment failures and determine root cause.
354366
355367
Deployment Information:
356-
- Timestamp: {datetime.now().isoformat()}
357368
- Stack Prefix: {stack_prefix}
358369
- Template URL: {template_url}
359370
- Total Patterns: {len(deployment_results)}
360371
361-
Raw Deployment Logs:
372+
Pattern Results:
373+
{json.dumps(deployment_results, indent=2)}
374+
375+
CodeBuild Logs:
362376
{deployment_logs}
363377
364-
Pattern Results Summary:
365-
{json.dumps(deployment_results, indent=2)}
378+
FIRST: Analyze CodeBuild logs for clear error messages. If root cause is unclear from CodeBuild logs, respond with "NEED_CF_LOGS" and list the failed stack names.
366379
367-
Create a summary with clean bullet format:
380+
IF root cause is clear from CodeBuild logs, create summary:
368381
369382
🚀 DEPLOYMENT RESULTS
370383
371384
📋 Pattern Status:
372-
• Pattern 1 - BDA: SUCCESS - Stack deployed successfully (120s)
373-
• Pattern 2 - OCR: FAILED - CloudFormation CREATE_FAILED (89s)
374-
• Pattern 3 - UDOP: SKIPPED - Not selected for deployment
385+
• Pattern 1 - BDA: FAILED - Stack deployment timeout (300s)
386+
• Pattern 2 - OCR: SUCCESS - Stack deployed successfully (120s)
375387
376388
🔍 Root Cause Analysis:
377-
• Analyze actual deployment results from Pattern Results Summary
378-
• Extract specific CloudFormation error messages and resource names
379-
• Focus on CREATE_FAILED, UPDATE_FAILED, ROLLBACK events
380-
• Check for smoke test failures and their underlying causes
381-
• Report Lambda function errors, API Gateway issues, IAM permissions
389+
• Extract specific error messages from CodeBuild logs
390+
• Focus on deployment failures, timeout errors, permission issues
391+
• Check for CLI command failures and their error messages
382392
383-
💡 Recommendations:
384-
• Use actual pattern names and statuses from deployment_results
385-
• Include specific CloudFormation stack names and error details
386-
• Provide smoke test error details and remediation steps
387-
388-
Keep each bullet point under 75 characters. Use clean text format.
389-
390-
IMPORTANT: Respond ONLY with clean bullet format above. No tables or boxes.
393+
💡 Fix Commands:
394+
• Provide specific commands to resolve identified issues
391395
392-
Requirements:
393-
- Analyze ALL error messages in logs for specific technical details
394-
- Include exact CloudFormation/Lambda error messages and specific commands to fix
395-
- Extract specific error patterns like "CREATE_FAILED", "UPDATE_FAILED", "ROLLBACK"
396-
- Provide detailed technical root cause analysis with specific resource names
397-
- Include actionable recommendations with exact terminal commands
396+
Keep each bullet point under 75 characters.
398397
398+
IMPORTANT: If CodeBuild logs don't show clear root cause, respond ONLY with "NEED_CF_LOGS: stack1,stack2"
399399
""")
400400

401401
# Call Bedrock API
@@ -404,56 +404,81 @@ def generate_deployment_summary(deployment_results, stack_prefix, template_url):
404404
body=json.dumps({
405405
"anthropic_version": "bedrock-2023-05-31",
406406
"max_tokens": 4000,
407-
"messages": [
408-
{
409-
"role": "user",
410-
"content": prompt
411-
}
412-
]
407+
"messages": [{"role": "user", "content": prompt}]
413408
})
414409
)
415410

416-
# Parse response
417411
response_body = json.loads(response['body'].read())
418-
summary = response_body['content'][0]['text']
419-
420-
print(summary)
412+
initial_summary = response_body['content'][0]['text']
413+
414+
# Check if we need CloudFormation logs
415+
if initial_summary.startswith("NEED_CF_LOGS"):
416+
# Get CloudFormation logs for failed stacks
417+
cf_logs = {}
418+
for result in deployment_results:
419+
if not result["success"] and result.get("stack_name") and result["stack_name"] != "N/A":
420+
cf_logs[result["stack_name"]] = get_cloudformation_logs(result["stack_name"])
421+
422+
# Second Bedrock call with CloudFormation logs
423+
cf_prompt = dedent(f"""
424+
CodeBuild logs were unclear. Analyze CloudFormation logs for root cause.
425+
426+
Pattern Results:
427+
{json.dumps(deployment_results, indent=2)}
428+
429+
CloudFormation Error Events:
430+
{json.dumps(cf_logs, indent=2)}
431+
432+
Create detailed analysis:
433+
434+
🚀 DEPLOYMENT RESULTS
435+
436+
📋 Pattern Status:
437+
• Pattern 1 - BDA: FAILED - Lambda CREATE_FAILED (IAM permissions)
438+
• Pattern 2 - OCR: SUCCESS - Stack deployed successfully
439+
440+
🔍 CloudFormation Root Cause:
441+
• Extract exact resource names and error messages
442+
• Identify specific failed resources (Lambda, IAM, S3, DynamoDB)
443+
• Focus on CREATE_FAILED, UPDATE_FAILED, ROLLBACK events
444+
• Analyze ResourceStatusReason for technical details
445+
446+
💡 Fix Commands:
447+
• Provide specific AWS CLI commands to fix issues
448+
• Include IAM policy updates, resource cleanup commands
449+
450+
Keep each bullet point under 75 characters.
451+
""")
452+
453+
cf_response = bedrock.invoke_model(
454+
modelId='anthropic.claude-3-5-sonnet-20240620-v1:0',
455+
body=json.dumps({
456+
"anthropic_version": "bedrock-2023-05-31",
457+
"max_tokens": 4000,
458+
"messages": [{"role": "user", "content": cf_prompt}]
459+
})
460+
)
461+
462+
cf_response_body = json.loads(cf_response['body'].read())
463+
return cf_response_body['content'][0]['text']
421464

422-
return summary
465+
return initial_summary
423466

424467
except Exception as e:
425-
print(f"⚠️ Failed to generate Bedrock summary: {e}")
426468
# Manual summary when Bedrock unavailable
427469
successful = sum(1 for r in deployment_results if r["success"])
428470
total = len(deployment_results)
429471

430-
manual_summary = dedent(f"""
431-
DEPLOYMENT SUMMARY REPORT (MANUAL)
432-
==================================
472+
return dedent(f"""
473+
DEPLOYMENT SUMMARY (MANUAL)
433474
434-
Timestamp: {datetime.now().isoformat()}
435-
Stack Prefix: {stack_prefix}
436-
Template URL: {template_url}
437-
438-
Overall Status: {'SUCCESS' if successful == total else 'PARTIAL_FAILURE' if successful > 0 else 'FAILURE'}
439475
Successful Patterns: {successful}/{total}
440476
441477
Pattern Results:
442-
""")
478+
{chr(10).join(f"- {r['pattern_name']}: {'SUCCESS' if r['success'] else 'FAILED'}" for r in deployment_results)}
443479
444-
for result in deployment_results:
445-
status = "✅ SUCCESS" if result["success"] else "❌ FAILED"
446-
manual_summary += f"- {result['pattern_name']}: {status}\n"
447-
448-
if successful < total:
449-
manual_summary += "\nRecommendation: Review failed patterns and retry deployment.\n"
450-
451-
print("📊 Deployment Summary (Manual):")
452-
print("=" * 80)
453-
print(manual_summary)
454-
print("=" * 80)
455-
456-
return manual_summary
480+
Error: Failed to generate AI analysis: {e}
481+
""")
457482

458483
def cleanup_stack(stack_name, pattern_name):
459484
print(f"[{pattern_name}] Cleaning up: {stack_name}")
@@ -539,14 +564,14 @@ def main():
539564
all_success = publish_success
540565
deployment_results = []
541566

542-
# Step 2: Deploy, test, and cleanup patterns concurrently (only if publish succeeded)
567+
# Step 2: Deploy and test patterns concurrently (only if publish succeeded)
543568
if publish_success:
544569
print("🚀 Starting concurrent deployment of all patterns...")
545570
with ThreadPoolExecutor(max_workers=len(DEPLOY_PATTERNS)) as executor:
546-
# Submit all deployment tasks
571+
# Submit all deployment tasks (without cleanup)
547572
future_to_pattern = {
548573
executor.submit(
549-
deploy_test_and_cleanup_pattern,
574+
deploy_and_test_pattern,
550575
stack_prefix,
551576
pattern_config,
552577
admin_email,
@@ -555,7 +580,7 @@ def main():
555580
for pattern_config in DEPLOY_PATTERNS
556581
}
557582

558-
# Collect results as they complete (cleanup happens within each pattern)
583+
# Collect results as they complete
559584
for future in as_completed(future_to_pattern):
560585
pattern_config = future_to_pattern[future]
561586
try:
@@ -586,15 +611,30 @@ def main():
586611
"error": "Failed to publish templates to S3"
587612
})
588613

589-
# Step 3: Generate deployment summary using Bedrock (ALWAYS run for analysis)
590-
print("\n🤖 Generating deployment summary with Bedrock...")
614+
# Step 3: Generate deployment summary using Bedrock (but don't print yet)
615+
print("\n🤖 Analyzing deployment results...")
616+
ai_summary = None
591617
try:
592618
if not publish_success:
593-
generate_publish_failure_summary(publish_error)
619+
ai_summary = generate_publish_failure_summary(publish_error)
594620
else:
595-
generate_deployment_summary(deployment_results, stack_prefix, template_url)
621+
ai_summary = generate_deployment_summary(deployment_results, stack_prefix, template_url)
596622
except Exception as e:
597-
print(f"⚠️ Failed to generate deployment summary: {e}")
623+
ai_summary = f"⚠️ Failed to generate deployment summary: {e}"
624+
625+
# Step 4: Cleanup stacks after analysis
626+
print("\n🧹 Starting cleanup of deployed stacks...")
627+
for result in deployment_results:
628+
if result.get("stack_name") and result["stack_name"] != "N/A":
629+
cleanup_stack(result["stack_name"], result["pattern_name"])
630+
631+
# Step 5: Print AI analysis results at the end
632+
print("\n" + "="*80)
633+
print("🤖 DEPLOYMENT ANALYSIS SUMMARY")
634+
print("="*80)
635+
if ai_summary:
636+
print(ai_summary)
637+
print("="*80)
598638

599639
# Check final status after all cleanups are done
600640
if all_success:

0 commit comments

Comments
 (0)