Skip to content

Commit cee8c67

Browse files
author
Taniya Mathur
committed
feat: enhance pipeline cleanup and email notifications
- Add comprehensive resource cleanup in codebuild_deployment.py: - ECR repositories cleanup - CloudWatch log groups cleanup (Step Functions, Lambda, CodeBuild, AppSync) - CloudWatch Logs Resource Policy cleanup to prevent size exceeded errors - Enhanced stack deletion with --empty-buckets and --wait flags - Add enhanced GitLab CI email notifications: - Capture actual CodeBuild logs using tracked pipeline execution ID - Generate detailed pipeline_summary.txt with deployment context - Save execution tracking files as artifacts - Fix CloudWatch Logs Resource Policy size exceeded error: - Root cause was accumulated policy entries from deleted log groups - Enhanced cleanup now removes both log groups and policy entries - Prevents future Step Functions deployment failures Resolves CloudWatch Logs Resource Policy size limit issues and improves pipeline failure debugging with detailed CodeBuild logs in notifications.
1 parent b5fff3a commit cee8c67

File tree

3 files changed

+73
-3
lines changed

3 files changed

+73
-3
lines changed

.gitlab-ci.yml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,40 @@ integration_tests:
136136

137137
# Run integration test deployment
138138
- python3 scripts/integration_test_deployment.py
139+
140+
after_script:
141+
# Capture CodeBuild logs using the tracked execution ID
142+
- |
143+
echo "=== IDP Pipeline Results ===" > pipeline_summary.txt
144+
echo "Branch: $CI_COMMIT_REF_NAME" >> pipeline_summary.txt
145+
echo "Commit: $CI_COMMIT_SHA" >> pipeline_summary.txt
146+
echo "Status: $CI_JOB_STATUS" >> pipeline_summary.txt
147+
echo "" >> pipeline_summary.txt
148+
149+
# Get CodeBuild logs using the exact execution ID from Python script
150+
if [ -f "pipeline_execution_id.txt" ]; then
151+
EXECUTION_ID=$(cat pipeline_execution_id.txt)
152+
echo "Pipeline Execution: $EXECUTION_ID" >> pipeline_summary.txt
153+
echo "" >> pipeline_summary.txt
154+
155+
# Get CodeBuild ID from the pipeline execution
156+
BUILD_ID=$(aws codepipeline list-action-executions --pipeline-name ${IDP_PIPELINE_NAME:-idp-sdlc-deploy-pipeline} --filter pipelineExecutionId=$EXECUTION_ID --query 'actionExecutionDetails[?actionName==`Deploy`].output.executionResult.externalExecutionId' --output text 2>/dev/null || echo "")
157+
158+
if [ "$BUILD_ID" != "" ] && [ "$BUILD_ID" != "None" ]; then
159+
echo "CodeBuild ID: $BUILD_ID" >> pipeline_summary.txt
160+
echo "" >> pipeline_summary.txt
161+
echo "=== CODEBUILD LOGS ===" >> pipeline_summary.txt
162+
aws logs get-log-events --log-group-name "/aws/codebuild/${IDP_PIPELINE_NAME:-idp-sdlc-deploy-pipeline}" --log-stream-name "$BUILD_ID" --limit 100 --query 'events[].message' --output text 2>/dev/null >> pipeline_summary.txt || echo "Could not retrieve CodeBuild logs" >> pipeline_summary.txt
163+
else
164+
echo "Could not find CodeBuild execution" >> pipeline_summary.txt
165+
fi
166+
else
167+
echo "No pipeline execution ID found" >> pipeline_summary.txt
168+
fi
169+
170+
artifacts:
171+
when: always
172+
paths:
173+
- pipeline_summary.txt
174+
- pipeline_execution_id.txt
175+
expire_in: 1 week

scripts/codebuild_deployment.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ def get_env_var(name, default=None):
6060

6161

6262
def generate_stack_prefix():
63-
"""Generate unique stack prefix with timestamp"""
64-
timestamp = datetime.now().strftime("%m%d-%H%M") # Shorter format: MMDD-HHMM
63+
"""Generate unique stack prefix with timestamp including seconds"""
64+
timestamp = datetime.now().strftime("%m%d-%H%M%S") # Format: MMDD-HHMMSS
6565
return f"idp-{timestamp}"
6666

6767

@@ -222,7 +222,35 @@ def cleanup_stack(stack_name, pattern_name):
222222
"""Clean up a deployed stack"""
223223
print(f"[{pattern_name}] Cleaning up: {stack_name}")
224224
try:
225-
run_command(f"idp-cli delete --stack-name {stack_name} --force", check=False)
225+
# Check stack status first
226+
result = run_command(f"aws cloudformation describe-stacks --stack-name {stack_name} --query 'Stacks[0].StackStatus' --output text", check=False)
227+
stack_status = result.stdout.strip() if result.returncode == 0 else "NOT_FOUND"
228+
229+
print(f"[{pattern_name}] Stack status: {stack_status}")
230+
231+
# Delete the stack and wait for completion
232+
print(f"[{pattern_name}] Attempting stack deletion...")
233+
run_command(f"idp-cli delete --stack-name {stack_name} --force --empty-buckets --wait", check=False)
234+
235+
# Always clean up orphaned resources after deletion attempt
236+
print(f"[{pattern_name}] Cleaning up orphaned resources...")
237+
238+
# ECR repositories
239+
stack_name_lower = stack_name.lower()
240+
run_command(f"aws ecr describe-repositories --query 'repositories[?contains(repositoryName, `{stack_name_lower}`)].repositoryName' --output text | xargs -r -n1 aws ecr delete-repository --repository-name --force", check=False)
241+
242+
# CloudWatch log groups
243+
run_command(f"aws logs describe-log-groups --log-group-name-prefix '/aws/vendedlogs/states/{stack_name}' --query 'logGroups[].logGroupName' --output text | xargs -r -n1 aws logs delete-log-group --log-group-name", check=False)
244+
run_command(f"aws logs describe-log-groups --log-group-name-prefix '/aws/lambda/{stack_name}' --query 'logGroups[].logGroupName' --output text | xargs -r -n1 aws logs delete-log-group --log-group-name", check=False)
245+
run_command(f"aws logs describe-log-groups --log-group-name-prefix '/{stack_name}' --query 'logGroups[].logGroupName' --output text | xargs -r -n1 aws logs delete-log-group --log-group-name", check=False)
246+
run_command(f"aws logs describe-log-groups --log-group-name-prefix '/aws/codebuild/{stack_name}' --query 'logGroups[].logGroupName' --output text | xargs -r -n1 aws logs delete-log-group --log-group-name", check=False)
247+
# AppSync logs (get API ID first, then delete log group)
248+
run_command(f"aws appsync list-graphql-apis --query 'graphqlApis[?contains(name, `{stack_name}`)].apiId' --output text | xargs -r -I {{}} aws logs delete-log-group --log-group-name '/aws/appsync/apis/{{}}'", check=False)
249+
run_command(f"aws logs describe-log-groups --query 'logGroups[?contains(logGroupName, `{stack_name}`)].logGroupName' --output text | xargs -r -n1 aws logs delete-log-group --log-group-name", check=False)
250+
251+
# Clean up CloudWatch Logs Resource Policy entries for deleted log groups
252+
run_command(f"aws logs describe-resource-policies --query 'resourcePolicies[0].policyName' --output text | xargs -r aws logs delete-resource-policy --policy-name", check=False)
253+
226254
print(f"[{pattern_name}] ✅ Cleanup completed")
227255
except Exception as e:
228256
print(f"[{pattern_name}] ⚠️ Cleanup failed: {e}")

scripts/integration_test_deployment.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,11 @@ def monitor_pipeline(pipeline_name, version_id, max_wait=7200):
173173

174174
if not execution_id:
175175
return False
176+
177+
# Write execution ID to file for GitLab CI to use
178+
with open("pipeline_execution_id.txt", "w") as f:
179+
f.write(execution_id)
180+
print(f"Pipeline execution ID written to file: {execution_id}")
176181

177182
# Then monitor that specific execution
178183
return monitor_pipeline_execution(pipeline_name, execution_id, max_wait)

0 commit comments

Comments
 (0)