Skip to content

Commit def0d03

Browse files
committed
Merge branch 'fix/missing-log-groups' into 'develop'
feat: Add CloudFront verification and auto-created log group discovery to stack cleanup See merge request genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator!419
2 parents 1cd5b06 + 02dedf6 commit def0d03

File tree

1 file changed

+306
-3
lines changed

1 file changed

+306
-3
lines changed

idp_cli/idp_cli/deployer.py

Lines changed: 306 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,245 @@ def _log_group_exists(self, log_group_name: str) -> bool:
663663
logger.warning(f"Error checking log group {log_group_name}: {e}")
664664
return False
665665

666+
def _get_stack_cloudfront_distributions(self, stack_name: str) -> List[Dict]:
667+
"""
668+
Get CloudFront distributions from stack resources
669+
670+
Args:
671+
stack_name: Stack name
672+
673+
Returns:
674+
List of CloudFront distribution information
675+
"""
676+
distributions = []
677+
678+
try:
679+
paginator = self.cfn.get_paginator("list_stack_resources")
680+
pages = paginator.paginate(StackName=stack_name)
681+
682+
for page in pages:
683+
for resource in page.get("StackResourceSummaries", []):
684+
if resource.get("ResourceType") == "AWS::CloudFront::Distribution":
685+
dist_id = resource.get("PhysicalResourceId")
686+
if dist_id:
687+
distributions.append(
688+
{
689+
"logical_id": resource.get("LogicalResourceId"),
690+
"distribution_id": dist_id,
691+
"status": resource.get("ResourceStatus"),
692+
}
693+
)
694+
695+
return distributions
696+
697+
except self.cfn.exceptions.ClientError as e:
698+
if "does not exist" in str(e):
699+
# Stack deleted - distributions should be gone
700+
return []
701+
logger.warning(f"Error getting CloudFront distributions: {e}")
702+
return []
703+
704+
def _verify_cloudfront_distributions_deleted(
705+
self, stack_name: str, max_wait_seconds: int = 300
706+
) -> None:
707+
"""
708+
Verify CloudFront distributions are deleted before proceeding with S3 deletion
709+
710+
This prevents orphaned CloudFront distributions pointing to deleted S3 origins.
711+
712+
Args:
713+
stack_name: Stack name
714+
max_wait_seconds: Maximum time to wait for distributions to be deleted
715+
716+
Raises:
717+
Exception: If distributions still exist after max wait time
718+
"""
719+
from rich.console import Console
720+
721+
console = Console()
722+
723+
# Get CloudFront distributions from stack
724+
distributions = self._get_stack_cloudfront_distributions(stack_name)
725+
726+
if not distributions:
727+
logger.info("No CloudFront distributions found in stack")
728+
return
729+
730+
console.print(
731+
f"[cyan]Verifying {len(distributions)} CloudFront distribution(s) are deleted...[/cyan]"
732+
)
733+
734+
cloudfront = boto3.client("cloudfront")
735+
start_time = time.time()
736+
737+
for dist_info in distributions:
738+
dist_id = dist_info["distribution_id"]
739+
logical_id = dist_info["logical_id"]
740+
741+
# Check if distribution still exists
742+
while True:
743+
try:
744+
response = cloudfront.get_distribution(Id=dist_id)
745+
dist_config = response.get("Distribution", {})
746+
status = dist_config.get("Status", "")
747+
enabled = dist_config.get("DistributionConfig", {}).get(
748+
"Enabled", False
749+
)
750+
751+
elapsed = time.time() - start_time
752+
753+
if elapsed > max_wait_seconds:
754+
raise Exception(
755+
f"CloudFront distribution {logical_id} ({dist_id}) still exists after {max_wait_seconds}s. "
756+
f"Status: {status}, Enabled: {enabled}. "
757+
f"Cannot proceed with S3 bucket deletion to prevent orphaned distributions. "
758+
f"Please disable/delete the distribution manually and retry."
759+
)
760+
761+
# Distribution still exists - wait for it to be deleted
762+
console.print(
763+
f" Waiting for {logical_id} ({dist_id}) to be deleted... "
764+
f"Status: {status}, Enabled: {enabled} ({int(elapsed)}s elapsed)"
765+
)
766+
time.sleep(10)
767+
768+
except cloudfront.exceptions.NoSuchDistribution:
769+
# Distribution deleted - good to proceed
770+
console.print(f" ✓ {logical_id} ({dist_id}) is deleted")
771+
break
772+
except Exception as e:
773+
if "NoSuchDistribution" in str(e):
774+
# Distribution deleted
775+
console.print(f" ✓ {logical_id} ({dist_id}) is deleted")
776+
break
777+
else:
778+
raise
779+
780+
def _discover_auto_created_log_groups(self, stack_name: str) -> List[str]:
781+
"""
782+
Discover auto-created log groups that match stack name patterns
783+
784+
These are log groups created automatically by AWS services (Lambda, CodeBuild,
785+
Glue Crawlers, etc.) that are not tracked by CloudFormation.
786+
787+
Args:
788+
stack_name: Stack name to match patterns against
789+
790+
Returns:
791+
List of log group names matching stack patterns
792+
"""
793+
logs = boto3.client("logs", region_name=self.region)
794+
discovered_log_groups = []
795+
796+
# Define patterns to match - these are auto-created by AWS services
797+
# Use exact prefixes to avoid inadvertent matches to longer stack names
798+
# (e.g., "idp1" should not match "idp10")
799+
patterns_to_check = [
800+
# Lambda functions - pattern requires hyphen after stack name
801+
f"/aws/lambda/{stack_name}-DOCUMENTKB",
802+
f"/aws/lambda/{stack_name}-BDASAMPLEPROJECT", # BDA sample project
803+
f"/aws/lambda/{stack_name}-DashboardMergerFunction",
804+
f"/aws/lambda/{stack_name}-InitializeConcurrencyTableLambda",
805+
# Nested stacks - pattern requires hyphen after stack name
806+
f"/{stack_name}-PATTERN1STACK-", # e.g., /IDPDocker-P1-PATTERN1STACK-ABC123/lambda/...
807+
f"/{stack_name}-PATTERN2STACK-",
808+
f"/{stack_name}-PATTERN3STACK-",
809+
# CodeBuild projects - pattern requires hyphen after stack name
810+
f"/aws/codebuild/{stack_name}-PATTERN1STACK", # Nested stack CodeBuild
811+
f"/aws/codebuild/{stack_name}-PATTERN2STACK",
812+
f"/aws/codebuild/{stack_name}-PATTERN3STACK",
813+
f"/aws/codebuild/{stack_name}-webui-build", # Main stack webui build
814+
# Glue crawlers - pattern requires hyphen after stack name
815+
f"/aws-glue/crawlers-role/{stack_name}-DocumentSectionsCrawlerRole",
816+
]
817+
818+
# Also check for explicit log group names (these may or may not be in CFN)
819+
# These are exact prefixes with hyphens to prevent matching longer stack names
820+
explicit_patterns = [
821+
f"{stack_name}-GetDomainLambdaLogGroup-",
822+
f"{stack_name}-StacknameCheckFunctionLogGroup-",
823+
f"{stack_name}-ConfigurationCopyFunctionLogGroup-",
824+
f"{stack_name}-UpdateSettingsFunctionLogGroup-",
825+
]
826+
827+
try:
828+
# Use paginator to handle large numbers of log groups
829+
paginator = logs.get_paginator("describe_log_groups")
830+
831+
# Track matches found per pattern for debugging
832+
pattern_match_count = {}
833+
834+
# Check each pattern
835+
for pattern in patterns_to_check:
836+
try:
837+
matches_for_pattern = 0
838+
page_iterator = paginator.paginate(logGroupNamePrefix=pattern)
839+
840+
for page in page_iterator:
841+
for log_group in page.get("logGroups", []):
842+
log_group_name = log_group["logGroupName"]
843+
844+
# Additional validation: ensure we don't match longer stack names
845+
# The pattern already includes a hyphen, so this should be safe
846+
# But we double-check by ensuring the log group starts with exactly our pattern
847+
if log_group_name.startswith(pattern):
848+
if log_group_name not in discovered_log_groups:
849+
discovered_log_groups.append(log_group_name)
850+
matches_for_pattern += 1
851+
logger.debug(
852+
f"Discovered auto-created log group: {log_group_name}"
853+
)
854+
855+
pattern_match_count[pattern] = matches_for_pattern
856+
857+
except Exception as e:
858+
logger.warning(f"Error checking pattern {pattern}: {e}")
859+
continue
860+
861+
# Check explicit patterns
862+
for pattern in explicit_patterns:
863+
try:
864+
matches_for_pattern = 0
865+
response = logs.describe_log_groups(
866+
logGroupNamePrefix=pattern,
867+
limit=50, # Should be enough for exact matches
868+
)
869+
870+
for log_group in response.get("logGroups", []):
871+
log_group_name = log_group["logGroupName"]
872+
# Verify exact prefix match to avoid matching longer stack names
873+
if log_group_name.startswith(pattern):
874+
if log_group_name not in discovered_log_groups:
875+
discovered_log_groups.append(log_group_name)
876+
matches_for_pattern += 1
877+
logger.debug(
878+
f"Discovered explicit log group: {log_group_name}"
879+
)
880+
881+
pattern_match_count[pattern] = matches_for_pattern
882+
883+
except Exception as e:
884+
logger.warning(f"Error checking explicit pattern {pattern}: {e}")
885+
continue
886+
887+
# Log summary with pattern match counts
888+
if discovered_log_groups:
889+
logger.info(
890+
f"Discovered {len(discovered_log_groups)} auto-created log groups for stack {stack_name}"
891+
)
892+
logger.debug(f"Pattern match counts: {pattern_match_count}")
893+
else:
894+
logger.info(f"No auto-created log groups found for stack {stack_name}")
895+
logger.debug(
896+
f"Checked {len(patterns_to_check) + len(explicit_patterns)} patterns"
897+
)
898+
899+
return discovered_log_groups
900+
901+
except Exception as e:
902+
logger.error(f"Error discovering auto-created log groups: {e}")
903+
return []
904+
666905
def cleanup_retained_resources(self, stack_identifier: str) -> Dict:
667906
"""
668907
Delete resources that CloudFormation retained
@@ -680,10 +919,44 @@ def cleanup_retained_resources(self, stack_identifier: str) -> Dict:
680919

681920
console.print("\n[bold blue]Analyzing retained resources...[/bold blue]")
682921

683-
# Get resources that weren't deleted
922+
# Get resources that weren't deleted by CloudFormation
684923
retained = self.get_retained_resources_after_deletion(stack_identifier)
685924

686-
# Count resources
925+
# Extract stack name from identifier (handle both stack name and stack ARN/ID)
926+
# Stack ARN format: arn:aws:cloudformation:region:account:stack/stack-name/guid
927+
stack_name = stack_identifier
928+
if stack_identifier.startswith("arn:"):
929+
try:
930+
# Extract stack name from ARN
931+
stack_name = stack_identifier.split("/")[1]
932+
except IndexError:
933+
logger.warning(
934+
f"Could not extract stack name from ARN: {stack_identifier}"
935+
)
936+
937+
# Discover auto-created log groups that CloudFormation doesn't track
938+
console.print("[cyan]Discovering auto-created log groups...[/cyan]")
939+
auto_created_log_groups = self._discover_auto_created_log_groups(stack_name)
940+
941+
# Merge auto-created log groups with CloudFormation-tracked ones
942+
# Use a set to avoid duplicates
943+
cfn_log_group_names = {lg["physical_id"] for lg in retained["log_groups"]}
944+
945+
for log_group_name in auto_created_log_groups:
946+
if log_group_name not in cfn_log_group_names:
947+
# Add to retained log groups list
948+
retained["log_groups"].append(
949+
{
950+
"logical_id": "Auto-created",
951+
"physical_id": log_group_name,
952+
"type": "AWS::Logs::LogGroup",
953+
"status": "AUTO_CREATED",
954+
"status_reason": "Auto-created by AWS service",
955+
"stack": stack_name,
956+
}
957+
)
958+
959+
# Count resources (including newly discovered log groups)
687960
total = (
688961
len(retained["dynamodb_tables"])
689962
+ len(retained["log_groups"])
@@ -760,7 +1033,37 @@ def cleanup_retained_resources(self, stack_identifier: str) -> Dict:
7601033
)
7611034
progress.advance(task)
7621035

763-
# Phase 3: S3 Buckets (LoggingBucket last)
1036+
# Phase 3: Verify CloudFront distributions are deleted before S3 buckets
1037+
# This prevents orphaned CloudFront distributions pointing to deleted S3 origins
1038+
if retained["s3_buckets"]:
1039+
console.print()
1040+
console.print(
1041+
"[cyan]Verifying CloudFront distributions are deleted...[/cyan]"
1042+
)
1043+
try:
1044+
self._verify_cloudfront_distributions_deleted(stack_name)
1045+
console.print(
1046+
"[green]✓ CloudFront distributions verified as deleted[/green]"
1047+
)
1048+
except Exception as e:
1049+
error_msg = str(e)
1050+
console.print(
1051+
f"[red]✗ CloudFront verification failed: {error_msg}[/red]"
1052+
)
1053+
results["errors"].append(
1054+
{
1055+
"resource": "CloudFront Distribution",
1056+
"type": "CloudFront",
1057+
"error": error_msg,
1058+
}
1059+
)
1060+
# Do not proceed with S3 deletion if CloudFront still exists
1061+
console.print(
1062+
"[yellow]Skipping S3 bucket deletion to prevent orphaned CloudFront distributions[/yellow]"
1063+
)
1064+
return results
1065+
1066+
# Phase 4: S3 Buckets (LoggingBucket last)
7641067
if retained["s3_buckets"]:
7651068
# Separate LoggingBucket from others
7661069
logging_bucket = None

0 commit comments

Comments
 (0)