Skip to content

Commit ac34be2

Browse files
committed
Merge branch 'fix/image-race-condition' into 'develop'
Fix/image race condition See merge request genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator!410
2 parents 6a1d055 + f4a4ba7 commit ac34be2

File tree

4 files changed

+128
-4
lines changed

4 files changed

+128
-4
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,18 @@ SPDX-License-Identifier: MIT-0
55

66
## [Unreleased]
77

8+
### Fixed
9+
10+
- **Pattern-2 Intermittent HITLStatusUpdateFunction ECR Access Failure**
11+
- Fixed intermittent "Lambda does not have permission to access the ECR image" (403) errors during Pattern-2 deployment
12+
- **Root Cause**: Race condition where Lambda functions were created before ECR images were fully available and scannable
13+
- **Solution**: Enhanced CodeBuild custom resource to verify ECR image availability before completing, including:
14+
- Verification that all required Lambda images exist in ECR repository
15+
- Check that image scanning is complete (repository has `ScanOnPush: true`)
16+
- **New Parameter**: Added `EnablePattern2ECRImageScanning` parameter (default: true) to allow users to disable ECR vulnerability scanning if experiencing deployment issues
17+
- Recommended: Keep enabled (true) for production to maintain security posture
18+
- Optional: Disable (false) only as temporary workaround for deployment reliability
19+
820
## [0.4.1]
921

1022
### Changed

patterns/pattern-2/template.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ Parameters:
110110
AllowedValues: ['true', 'false']
111111
Description: Enable X-Ray tracing
112112

113+
EnableECRImageScanning:
114+
Type: String
115+
Default: 'true'
116+
AllowedValues: ['true', 'false']
117+
Description: Enable automatic vulnerability scanning for Lambda container images in ECR
118+
113119
PermissionsBoundaryArn:
114120
Type: String
115121
Default: ""
@@ -147,6 +153,7 @@ Conditions:
147153
!Not [!Equals [!Ref CustomExtractionModelARN, ""]]
148154
HasPermissionsBoundary: !Not [!Equals [!Ref PermissionsBoundaryArn, ""]]
149155
HasAppSyncApi: !Not [!Equals [!Ref AppSyncApiArn, ""]]
156+
IsECRImageScanningEnabled: !Equals [!Ref EnableECRImageScanning, "true"]
150157

151158
Resources:
152159
Pattern2DockerBuildRole:
@@ -202,6 +209,7 @@ Resources:
202209
Effect: Allow
203210
Action:
204211
- ecr:DescribeImageScanFindings
212+
- ecr:DescribeImages
205213
- ecr:StartImageScan
206214
# Required for Amazon Inspector Enhanced Scanning
207215
# https://docs.aws.amazon.com/AmazonECR/latest/userguide/image-scanning-enhanced-iam.html
@@ -223,7 +231,7 @@ Resources:
223231
# checkov:skip=CKV_AWS_51: "Mutable tags allowed for workflow flexibility and version updates."
224232
Properties:
225233
ImageScanningConfiguration:
226-
ScanOnPush: true
234+
ScanOnPush: !If [IsECRImageScanningEnabled, true, false]
227235
EncryptionConfiguration:
228236
EncryptionType: KMS
229237
KmsKey: !Ref CustomerManagedEncryptionKeyArn

src/lambda/start_codebuild/index.py

Lines changed: 95 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,77 @@ def create_or_update(event, _):
6060
raise ValueError(f"invalid resource type: {resource_type}")
6161

6262

63+
def _verify_ecr_images_available(ecr_uri: str, image_version: str) -> bool:
64+
"""Verify all required Lambda images exist in ECR and are pullable.
65+
66+
Args:
67+
ecr_uri: ECR repository URI (e.g., 123456789012.dkr.ecr.us-east-1.amazonaws.com/repo-name)
68+
image_version: Image version tag (e.g., "latest" or "0.3.19")
69+
70+
Returns:
71+
True if all images are available and scannable, False otherwise
72+
"""
73+
try:
74+
repository_name = ecr_uri.split("/")[-1]
75+
76+
# List of all image tags used by Lambda functions in Pattern 2
77+
required_images = [
78+
f"ocr-function-{image_version}",
79+
f"classification-function-{image_version}",
80+
f"extraction-function-{image_version}",
81+
f"assessment-function-{image_version}",
82+
f"processresults-function-{image_version}",
83+
f"hitl-wait-function-{image_version}",
84+
f"hitl-status-update-function-{image_version}",
85+
f"hitl-process-function-{image_version}",
86+
f"summarization-function-{image_version}",
87+
]
88+
89+
LOGGER.info(
90+
"verifying %d images in repository %s with version %s",
91+
len(required_images),
92+
repository_name,
93+
image_version,
94+
)
95+
96+
# Check each image
97+
for image_tag in required_images:
98+
try:
99+
response = ECR_CLIENT.describe_images(
100+
repositoryName=repository_name,
101+
imageIds=[{"imageTag": image_tag}]
102+
)
103+
104+
images = response.get("imageDetails", [])
105+
if not images:
106+
LOGGER.warning("image %s not found in ECR", image_tag)
107+
return False
108+
109+
# Check if image scan is complete (repository has ScanOnPush enabled)
110+
image = images[0]
111+
scan_status = image.get("imageScanStatus", {}).get("status")
112+
113+
if scan_status == "IN_PROGRESS":
114+
LOGGER.info("image %s scan still in progress", image_tag)
115+
return False
116+
117+
LOGGER.info("image %s verified (scan status: %s)", image_tag, scan_status)
118+
119+
except ClientError as error:
120+
if error.response["Error"]["Code"] == "ImageNotFoundException":
121+
LOGGER.warning("image %s not found: %s", image_tag, error)
122+
return False
123+
LOGGER.error("error checking image %s: %s", image_tag, error)
124+
raise
125+
126+
LOGGER.info("all %d required images are available in ECR", len(required_images))
127+
return True
128+
129+
except Exception as exception: # pylint: disable=broad-except
130+
LOGGER.error("error verifying ECR images: %s", exception)
131+
return False
132+
133+
63134
@HELPER.poll_create
64135
@HELPER.poll_update
65136
def poll_create_or_update(event, _):
@@ -82,7 +153,28 @@ def poll_create_or_update(event, _):
82153
LOGGER.info("build status: [%s]", build_status)
83154

84155
if build_status == "SUCCEEDED":
85-
LOGGER.info("returning True")
156+
# Verify ECR images are available before returning success
157+
# This prevents Lambda functions from being created before images are pullable
158+
env_vars = build.get("environment", {}).get("environmentVariables", [])
159+
160+
# Extract ECR URI and image version from build environment
161+
ecr_uri = next((v["value"] for v in env_vars if v["name"] == "ECR_URI"), None)
162+
image_version = next((v["value"] for v in env_vars if v["name"] == "IMAGE_VERSION"), None)
163+
164+
if ecr_uri and image_version:
165+
LOGGER.info("verifying ECR images are available and pullable...")
166+
if _verify_ecr_images_available(ecr_uri, image_version):
167+
LOGGER.info("ECR image verification complete - returning True")
168+
return True
169+
170+
LOGGER.info("ECR images not yet available - returning None to poll again")
171+
return None
172+
173+
# Fallback: if we can't extract variables, proceed without verification
174+
LOGGER.warning(
175+
"could not extract ECR_URI or IMAGE_VERSION from build environment, "
176+
"proceeding without ECR verification"
177+
)
86178
return True
87179

88180
if build_status == "IN_PROGRESS":
@@ -150,7 +242,7 @@ def _delete_all_ecr_images(repository_name: str) -> None:
150242
if not image_ids:
151243
continue
152244
images_to_delete.extend(image_ids)
153-
LOGGER.debug(
245+
LOGGER.info(
154246
"queued %s images for deletion from repository %s",
155247
len(image_ids),
156248
repository_name,
@@ -162,7 +254,7 @@ def _delete_all_ecr_images(repository_name: str) -> None:
162254

163255
for chunk_start in range(0, len(images_to_delete), 100):
164256
chunk = images_to_delete[chunk_start : chunk_start + 100]
165-
LOGGER.debug(
257+
LOGGER.info(
166258
"deleting %s images from repository %s",
167259
len(chunk),
168260
repository_name,

template.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,17 @@ Parameters:
328328
AllowedPattern: "^(|arn:aws[a-z-]*:iam::[0-9]{12}:policy/.+)$"
329329
ConstraintDescription: Must be empty or a valid IAM policy ARN
330330

331+
EnablePattern2ECRImageScanning:
332+
Type: String
333+
Default: "true"
334+
AllowedValues:
335+
- "true"
336+
- "false"
337+
Description: >-
338+
Enable automatic vulnerability scanning for Pattern-2 Lambda container images in ECR.
339+
Disabling may improve deployment reliability but reduces security posture.
340+
Recommended: true for production, false only if experiencing deployment issues.
341+
331342
# Logging configuration
332343
LogLevel:
333344
Type: String
@@ -960,6 +971,7 @@ Resources:
960971
ArtifactPrefix: "<ARTIFACT_PREFIX_TOKEN>"
961972
Pattern2SourceZipfile: "<PATTERN2_SOURCE_ZIPFILE_TOKEN>"
962973
EvaluationFunctionArn: !GetAtt EvaluationFunction.Arn
974+
EnableECRImageScanning: !Ref EnablePattern2ECRImageScanning
963975

964976
PATTERN3STACK:
965977
DependsOn:

0 commit comments

Comments
 (0)