Skip to content

Commit db40c76

Browse files
author
Taniya Mathur
committed
Add AI-powered deployment summary and fix cleanup reliability
- Add Bedrock integration for AI-powered deployment analysis - Add get_codebuild_logs() function to capture deployment logs - Add generate_deployment_summary() using Claude 3.5 Sonnet - Add CodeBuildBedrockPolicy to CloudFormation template - Fix cleanup function reliability issues: - Replace fragile xargs pipelines with proper Python logic - Add individual resource processing with logging - Fix S3 bucket deletion that was failing silently - Fix ECR, CloudWatch, and AppSync resource cleanup - Make resource policy cleanup stack-specific - Add comprehensive error handling and verification Tested: Successfully cleaned up orphaned resources and generates deployment summaries
1 parent 13b3aae commit db40c76

File tree

2 files changed

+222
-13
lines changed

2 files changed

+222
-13
lines changed

scripts/codebuild_deployment.py

Lines changed: 205 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,16 @@
55
Handles IDP stack deployment and testing in AWS CodeBuild environment.
66
"""
77

8+
import json
89
import os
910
import re
1011
import subprocess
1112
import sys
1213
from concurrent.futures import ThreadPoolExecutor, as_completed
1314
from datetime import datetime
15+
from textwrap import dedent
16+
17+
import boto3
1418

1519
# Configuration for patterns to deploy
1620
DEPLOY_PATTERNS = [
@@ -130,6 +134,7 @@ def deploy_test_and_cleanup_pattern(stack_prefix, pattern_config, admin_email, t
130134
"stack_name": stack_name,
131135
"pattern_name": pattern_name,
132136
"success": False,
137+
"error": f"Stack deployment failed with status: {result.stdout.strip()}"
133138
}
134139

135140
print(f"[{pattern_name}] ✅ Stack is healthy")
@@ -161,6 +166,7 @@ def deploy_test_and_cleanup_pattern(stack_prefix, pattern_config, admin_email, t
161166
"stack_name": stack_name,
162167
"pattern_name": pattern_name,
163168
"success": False,
169+
"error": f"No result file found at expected location: {result_location}"
164170
}
165171

166172
# Verify the result file contains expected content
@@ -190,6 +196,7 @@ def deploy_test_and_cleanup_pattern(stack_prefix, pattern_config, admin_email, t
190196
"stack_name": stack_name,
191197
"pattern_name": pattern_name,
192198
"success": False,
199+
"error": f"Verification failed: Expected string '{verify_string}' not found in result"
193200
}
194201

195202
print(
@@ -200,6 +207,7 @@ def deploy_test_and_cleanup_pattern(stack_prefix, pattern_config, admin_email, t
200207
"stack_name": stack_name,
201208
"pattern_name": pattern_name,
202209
"success": True,
210+
"verification_string": verify_string
203211
}
204212

205213
except Exception as e:
@@ -208,6 +216,7 @@ def deploy_test_and_cleanup_pattern(stack_prefix, pattern_config, admin_email, t
208216
"stack_name": stack_name,
209217
"pattern_name": pattern_name,
210218
"success": False,
219+
"error": f"Result validation failed: {str(e)}"
211220
}
212221

213222
except Exception as e:
@@ -216,6 +225,7 @@ def deploy_test_and_cleanup_pattern(stack_prefix, pattern_config, admin_email, t
216225
"stack_name": stack_name,
217226
"pattern_name": pattern_name,
218227
"success": False,
228+
"error": f"Deployment/testing failed: {str(e)}"
219229
}
220230

221231
# Always cleanup the stack regardless of success/failure
@@ -225,8 +235,144 @@ def deploy_test_and_cleanup_pattern(stack_prefix, pattern_config, admin_email, t
225235
return success_result
226236

227237

238+
def get_codebuild_logs():
239+
"""Get CodeBuild logs from CloudWatch"""
240+
try:
241+
# Get CodeBuild build ID from environment
242+
build_id = os.environ.get('CODEBUILD_BUILD_ID', '')
243+
if not build_id:
244+
return "CodeBuild logs not available (not running in CodeBuild)"
245+
246+
# Extract log group and stream from build ID
247+
log_group = f"/aws/codebuild/{build_id.split(':')[0]}"
248+
log_stream = build_id.split('/')[-1]
249+
250+
# Get logs from CloudWatch
251+
logs_client = boto3.client('logs')
252+
response = logs_client.get_log_events(
253+
logGroupName=log_group,
254+
logStreamName=log_stream,
255+
startFromHead=True
256+
)
257+
258+
# Extract log messages
259+
log_messages = []
260+
for event in response.get('events', []):
261+
log_messages.append(event['message'])
262+
263+
return '\n'.join(log_messages)
264+
265+
except Exception as e:
266+
return f"Failed to retrieve CodeBuild logs: {str(e)}"
267+
268+
269+
def generate_deployment_summary(deployment_results, stack_prefix, template_url):
270+
"""
271+
Generate deployment summary using Bedrock API
272+
273+
Args:
274+
deployment_results: List of deployment result dictionaries
275+
stack_prefix: Stack prefix used for deployment
276+
template_url: Template URL used for deployment
277+
278+
Returns:
279+
str: Generated summary text
280+
"""
281+
try:
282+
# Get CodeBuild logs
283+
deployment_logs = get_codebuild_logs()
284+
285+
# Initialize Bedrock client
286+
bedrock = boto3.client('bedrock-runtime')
287+
288+
# Create prompt for Bedrock with actual logs
289+
prompt = dedent(f"""
290+
You are an AWS deployment analyst. Analyze the following deployment logs and create a comprehensive summary.
291+
292+
Deployment Information:
293+
- Timestamp: {datetime.now().isoformat()}
294+
- Stack Prefix: {stack_prefix}
295+
- Template URL: {template_url}
296+
- Total Patterns: {len(deployment_results)}
297+
298+
Raw Deployment Logs:
299+
{deployment_logs}
300+
301+
Pattern Results Summary:
302+
{json.dumps(deployment_results, indent=2)}
303+
304+
Please provide:
305+
1. Executive Summary (2-3 sentences)
306+
2. Deployment Status Overview
307+
3. Pattern-by-Pattern Analysis
308+
4. Failure Analysis (extract specific errors from logs)
309+
5. Recommendations based on log analysis
310+
311+
Focus on extracting failure reasons from the actual logs and provide actionable insights.
312+
""")
313+
314+
# Call Bedrock API
315+
response = bedrock.invoke_model(
316+
modelId='anthropic.claude-3-5-sonnet-20240620-v1:0',
317+
body=json.dumps({
318+
"anthropic_version": "bedrock-2023-05-31",
319+
"max_tokens": 2000,
320+
"messages": [
321+
{
322+
"role": "user",
323+
"content": prompt
324+
}
325+
]
326+
})
327+
)
328+
329+
# Parse response
330+
response_body = json.loads(response['body'].read())
331+
summary = response_body['content'][0]['text']
332+
333+
print("📊 Deployment Summary Generated:")
334+
print("=" * 80)
335+
print(summary)
336+
print("=" * 80)
337+
338+
return summary
339+
340+
except Exception as e:
341+
print(f"⚠️ Failed to generate Bedrock summary: {e}")
342+
# Manual summary when Bedrock unavailable
343+
successful = sum(1 for r in deployment_results if r["success"])
344+
total = len(deployment_results)
345+
346+
manual_summary = dedent(f"""
347+
DEPLOYMENT SUMMARY REPORT (MANUAL)
348+
==================================
349+
350+
Timestamp: {datetime.now().isoformat()}
351+
Stack Prefix: {stack_prefix}
352+
Template URL: {template_url}
353+
354+
Overall Status: {'SUCCESS' if successful == total else 'PARTIAL_FAILURE' if successful > 0 else 'FAILURE'}
355+
Successful Patterns: {successful}/{total}
356+
357+
Pattern Results:
358+
""")
359+
360+
for result in deployment_results:
361+
status = "✅ SUCCESS" if result["success"] else "❌ FAILED"
362+
manual_summary += f"- {result['pattern_name']}: {status}\n"
363+
364+
if successful < total:
365+
manual_summary += "\nRecommendation: Review failed patterns and retry deployment.\n"
366+
367+
print("📊 Deployment Summary (Manual):")
368+
print("=" * 80)
369+
print(manual_summary)
370+
print("=" * 80)
371+
372+
return manual_summary
373+
374+
228375
def cleanup_stack(stack_name, pattern_name):
229-
"""Clean up a deployed stack"""
230376
print(f"[{pattern_name}] Cleaning up: {stack_name}")
231377
try:
232378
# Check stack status first
@@ -247,20 +393,51 @@ def cleanup_stack(stack_name, pattern_name):
247393
os.environ['AWS_RETRY_MODE'] = 'adaptive'
248394

249395
# ECR repositories
396+
print(f"[{pattern_name}] Cleaning up ECR repositories...")
250397
stack_name_lower = stack_name.lower()
251-
run_command(f"aws ecr describe-repositories --query 'repositories[?contains(repositoryName, `{stack_name_lower}`)].repositoryName' --output text | xargs -r -n1 aws ecr delete-repository --repository-name --force", check=False)
252-
398+
result = run_command(f"aws ecr describe-repositories --query 'repositories[?contains(repositoryName, `{stack_name_lower}`)].repositoryName' --output text", check=False)
399+
if result.stdout.strip():
400+
repo_names = [name for name in result.stdout.strip().split('\t') if name]
401+
for repo_name in repo_names:
402+
print(f"[{pattern_name}] Deleting ECR repository: {repo_name}")
403+
run_command(f"aws ecr delete-repository --repository-name {repo_name} --force", check=False)
404+
253405
# S3 buckets (empty and delete orphaned buckets)
254-
run_command(f"aws s3api list-buckets --query 'Buckets[?contains(Name, `{stack_name}`)].Name' --output text | xargs -r -n1 -I {{}} sh -c 'aws s3 rm s3://{{}} --recursive && aws s3api delete-bucket --bucket {{}}'", check=False)
255-
256-
# CloudWatch log groups (single comprehensive search)
257-
run_command(f"aws logs describe-log-groups --query 'logGroups[?contains(logGroupName, `{stack_name}`)].logGroupName' --output text | xargs -r -n1 aws logs delete-log-group --log-group-name", check=False)
258-
259-
# AppSync logs (requires separate handling due to random API IDs)
260-
run_command(f"aws appsync list-graphql-apis --query 'graphqlApis[?contains(name, `{stack_name}`)].apiId' --output text | xargs -r -I {{}} aws logs delete-log-group --log-group-name '/aws/appsync/apis/{{}}'", check=False)
406+
print(f"[{pattern_name}] Cleaning up S3 buckets...")
407+
result = run_command(f"aws s3api list-buckets --query 'Buckets[?contains(Name, `{stack_name}`)].Name' --output text", check=False)
408+
if result.stdout.strip():
409+
bucket_names = [name for name in result.stdout.strip().split('\t') if name]
410+
for bucket_name in bucket_names:
411+
print(f"[{pattern_name}] Deleting bucket: {bucket_name}")
412+
run_command(f"aws s3 rm s3://{bucket_name} --recursive", check=False)
413+
run_command(f"aws s3api delete-bucket --bucket {bucket_name}", check=False)
414+
415+
# CloudWatch log groups
416+
print(f"[{pattern_name}] Cleaning up CloudWatch log groups...")
417+
result = run_command(f"aws logs describe-log-groups --query 'logGroups[?contains(logGroupName, `{stack_name}`)].logGroupName' --output text", check=False)
418+
if result.stdout.strip():
419+
log_group_names = [name for name in result.stdout.strip().split('\t') if name]
420+
for log_group_name in log_group_names:
421+
print(f"[{pattern_name}] Deleting log group: {log_group_name}")
422+
run_command(f"aws logs delete-log-group --log-group-name {log_group_name}", check=False)
423+
424+
# AppSync logs
425+
print(f"[{pattern_name}] Cleaning up AppSync logs...")
426+
result = run_command(f"aws appsync list-graphql-apis --query 'graphqlApis[?contains(name, `{stack_name}`)].apiId' --output text", check=False)
427+
if result.stdout.strip():
428+
api_ids = [api_id for api_id in result.stdout.strip().split('\t') if api_id]
429+
for api_id in api_ids:
430+
print(f"[{pattern_name}] Deleting AppSync log group for API: {api_id}")
431+
run_command(f"aws logs delete-log-group --log-group-name '/aws/appsync/apis/{api_id}'", check=False)
261432

262-
# Clean up CloudWatch Logs Resource Policy (ignore errors if policy doesn't exist)
263-
run_command(f"aws logs describe-resource-policies --query 'resourcePolicies[0].policyName' --output text | xargs -r aws logs delete-resource-policy --policy-name || true", check=False)
433+
# Clean up CloudWatch Logs Resource Policy only if stack-specific
434+
print(f"[{pattern_name}] Checking CloudWatch resource policies...")
435+
result = run_command(f"aws logs describe-resource-policies --query 'resourcePolicies[?contains(policyName, `{stack_name}`)].policyName' --output text", check=False)
436+
if result.stdout.strip():
437+
policy_names = [name for name in result.stdout.strip().split('\t') if name]
438+
for policy_name in policy_names:
439+
print(f"[{pattern_name}] Deleting resource policy: {policy_name}")
440+
run_command(f"aws logs delete-resource-policy --policy-name {policy_name}", check=False)
264441

265442
print(f"[{pattern_name}] ✅ Cleanup completed")
266443
except Exception as e:
@@ -282,6 +459,7 @@ def main():
282459
template_url = publish_templates()
283460

284461
all_success = True
462+
deployment_results = []
285463

286464
# Step 2: Deploy, test, and cleanup patterns concurrently
287465
print("🚀 Starting concurrent deployment of all patterns...")
@@ -303,6 +481,7 @@ def main():
303481
pattern_config = future_to_pattern[future]
304482
try:
305483
result = future.result()
484+
deployment_results.append(result)
306485
if not result["success"]:
307486
all_success = False
308487
print(f"[{pattern_config['name']}] ❌ Failed")
@@ -311,8 +490,22 @@ def main():
311490

312491
except Exception as e:
313492
print(f"[{pattern_config['name']}] ❌ Exception: {e}")
493+
# Add failed result for exception cases
494+
deployment_results.append({
495+
"stack_name": f"{stack_prefix}-{pattern_config['suffix']}",
496+
"pattern_name": pattern_config['name'],
497+
"success": False,
498+
"error": str(e)
499+
})
314500
all_success = False
315501

502+
# Step 3: Generate deployment summary using Bedrock
503+
print("\n🤖 Generating deployment summary with Bedrock...")
504+
try:
505+
generate_deployment_summary(deployment_results, stack_prefix, template_url)
506+
except Exception as e:
507+
print(f"⚠️ Failed to generate deployment summary: {e}")
508+
316509
# Check final status after all cleanups are done
317510
if all_success:
318511
print("🎉 All pattern deployments completed successfully!")

scripts/sdlc/cfn/codepipeline-s3.yml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,4 +257,20 @@ Resources:
257257
- 'logs:CreateLogGroup'
258258
- 'logs:CreateLogStream'
259259
- 'logs:PutLogEvents'
260-
Resource: '*'
260+
Resource: '*'
261+
262+
CodeBuildBedrockPolicy:
263+
Type: 'AWS::IAM::Policy'
264+
Condition: CreateCodeBuildRole
265+
Properties:
266+
PolicyName: CodeBuildBedrockAccess
267+
Roles:
268+
- !Ref CodeBuildRole
269+
PolicyDocument:
270+
Version: '2012-10-17'
271+
Statement:
272+
- Effect: Allow
273+
Action:
274+
- 'bedrock:InvokeModel'
275+
Resource:
276+
- !Sub 'arn:aws:bedrock:*::foundation-model/anthropic.claude-3-5-sonnet-20240620-v1:0'

0 commit comments

Comments
 (0)