From 07d21589909c233b86e0c551253b1c6c975dbba0 Mon Sep 17 00:00:00 2001 From: Kazmer Nagy-Betegh Date: Wed, 5 Nov 2025 15:55:10 +0000 Subject: [PATCH 1/3] try fixing buildspec file --- patterns/pattern-2/buildspec.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patterns/pattern-2/buildspec.yml b/patterns/pattern-2/buildspec.yml index 55c5eddff..f3bfd0a64 100644 --- a/patterns/pattern-2/buildspec.yml +++ b/patterns/pattern-2/buildspec.yml @@ -56,7 +56,7 @@ phases: - echo "All Pattern-2 Docker images successfully built and pushed to ECR" - echo "ECR Repository - $ECR_URI" - echo "Image Version - $IMAGE_VERSION" - - echo "Note: ECR vulnerability scans initiated (ScanOnPush enabled)" + - 'echo "Note: ECR vulnerability scans initiated (ScanOnPush enabled)"' - echo "Scans will complete asynchronously. Check ECR console for results." - echo "For accounts with Amazon Inspector Enhanced Scanning, scans may take 10-30 minutes per image." - | From 0be7d92a665d9bb9f5d9186b41a6e2c8fb502d4c Mon Sep 17 00:00:00 2001 From: Kazmer Nagy-Betegh Date: Wed, 5 Nov 2025 16:01:35 +0000 Subject: [PATCH 2/3] make buildspec validation part of the lint --- .gitignore | 7 +- Makefile | 9 +- scripts/README_validate_buildspec.md | 193 ++++++++++++++++++++ scripts/validate_buildspec.py | 254 +++++++++++++++++++++++++++ 4 files changed, 461 insertions(+), 2 deletions(-) create mode 100644 scripts/README_validate_buildspec.md create mode 100755 scripts/validate_buildspec.py diff --git a/.gitignore b/.gitignore index a66792125..d4d4441f3 100644 --- a/.gitignore +++ b/.gitignore @@ -31,4 +31,9 @@ package-lock.json # Type checking pyrightconfig.temp.json -.pyright/ \ No newline at end of file +.pyright/ + +# Python virtual environments +.venv/ +.venv-*/ +venv/ \ No newline at end of file diff --git a/Makefile b/Makefile index 938d7f1b7..695af9267 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ test: cd idp_cli && python -m pytest -v # Run both linting and formatting in one command -lint: ruff-lint format check-arn-partitions ui-lint +lint: ruff-lint format check-arn-partitions validate-buildspec ui-lint # Run linting checks and fix issues automatically ruff-lint: @@ -53,6 +53,13 @@ lint-cicd: @echo -e "$(GREEN)All code quality checks passed!$(NC)" +# Validate AWS CodeBuild buildspec files +validate-buildspec: + @echo "Validating buildspec files..." + @python3 scripts/validate_buildspec.py patterns/*/buildspec.yml || \ + (echo -e "$(RED)ERROR: Buildspec validation failed!$(NC)" && exit 1) + @echo -e "$(GREEN)✅ All buildspec files are valid!$(NC)" + # Check CloudFormation templates for hardcoded AWS partition ARNs and service principals check-arn-partitions: @echo "Checking CloudFormation templates for hardcoded ARN partitions and service principals..." diff --git a/scripts/README_validate_buildspec.md b/scripts/README_validate_buildspec.md new file mode 100644 index 000000000..71562b0ad --- /dev/null +++ b/scripts/README_validate_buildspec.md @@ -0,0 +1,193 @@ +# AWS CodeBuild Buildspec Validator + +A Python script to validate AWS CodeBuild `buildspec.yml` files for syntax errors, structural issues, and best practices. + +## Features + +- **YAML Syntax Validation**: Ensures buildspec files are valid YAML +- **Structure Validation**: Checks for required fields (`version`, `phases`) +- **Type Checking**: Validates that commands are strings, not accidentally parsed as objects +- **Best Practices**: Warns about unknown phases or deprecated features +- **Multi-file Support**: Can validate multiple buildspec files at once using glob patterns + +## Installation + +The validator requires Python 3.6+ and PyYAML: + +```bash +pip install pyyaml +``` + +For development environments with externally-managed Python (like macOS with Homebrew), create a virtual environment: + +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install pyyaml +``` + +## Usage + +### Validate a single file + +```bash +python3 scripts/validate_buildspec.py patterns/pattern-2/buildspec.yml +``` + +### Validate multiple files with glob patterns + +```bash +python3 scripts/validate_buildspec.py patterns/*/buildspec.yml +``` + +### Using the Makefile target + +```bash +make validate-buildspec +``` + +This is also included in the `make lint` command. + +## Output + +The validator provides clear output with: +- ✅ Success indicators +- ❌ Error messages with specific line numbers +- ⚠️ Warnings for non-critical issues +- 📊 Summary of phases and command counts + +### Example Output + +``` +Validating: patterns/pattern-2/buildspec.yml +====================================================================== +✅ Valid buildspec file + +Summary: + Version: 0.2 + Phases: pre_build, build, post_build + - pre_build: 7 commands + - build: 39 commands + - post_build: 8 commands +``` + +### Example Error Output + +``` +Validating: patterns/pattern-2/buildspec.yml +====================================================================== + +❌ ERRORS (1): + - Phase 'post_build', command #5 must be a string, got dict + +❌ Invalid buildspec file +``` + +## Common Issues Detected + +### 1. Colons in Command Strings + +**Problem**: YAML interprets colons as key-value separators, even in quoted strings in some cases. + +```yaml +# ❌ BAD - May be parsed as a dictionary +- echo "Note: This is a message" + +# ✅ GOOD - Use single quotes around the entire command +- 'echo "Note: This is a message"' +``` + +### 2. Missing Required Fields + +The validator checks for: +- `version` field (must be 0.1 or 0.2) +- `phases` section (must have at least one phase) + +### 3. Invalid Command Types + +All commands must be strings: + +```yaml +# ❌ BAD - Command is a dictionary +phases: + build: + commands: + - echo: "This is wrong" + +# ✅ GOOD - Command is a string +phases: + build: + commands: + - echo "This is correct" +``` + +## Exit Codes + +- `0`: All buildspec files are valid +- `1`: One or more buildspec files have errors + +This makes it suitable for use in CI/CD pipelines: + +```yaml +- name: Validate Buildspec + run: python3 scripts/validate_buildspec.py patterns/*/buildspec.yml +``` + +## Limitations + +This validator checks for: +- YAML syntax errors +- Required fields and structure +- Data type correctness +- Common mistakes + +It does **not** validate: +- AWS-specific runtime environments +- Environment variable references +- S3 artifact paths +- IAM permissions + +For complete validation, test your buildspec in an actual CodeBuild environment. + +## Integration with CI/CD + +### GitHub Actions + +Already integrated in `.github/workflows/developer-tests.yml` via the `make lint` command. + +### Local Pre-commit Hook + +Add to `.git/hooks/pre-commit`: + +```bash +#!/bin/bash +python3 scripts/validate_buildspec.py patterns/*/buildspec.yml || exit 1 +``` + +## Troubleshooting + +### "ModuleNotFoundError: No module named 'yaml'" + +Install PyYAML: +```bash +pip install pyyaml +``` + +### "externally-managed-environment" + +On macOS with Homebrew Python, use a virtual environment: +```bash +python3 -m venv .venv +source .venv/bin/activate +pip install pyyaml +``` + +## Contributing + +When adding new buildspec files to the repository, ensure they pass validation: + +```bash +make validate-buildspec +``` + +This is automatically checked in CI/CD pipelines. diff --git a/scripts/validate_buildspec.py b/scripts/validate_buildspec.py new file mode 100755 index 000000000..9bf6acf81 --- /dev/null +++ b/scripts/validate_buildspec.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +""" +AWS CodeBuild buildspec.yml validator + +This script validates AWS CodeBuild buildspec files for: +- Valid YAML syntax +- Required fields (version, phases) +- Correct structure and data types +- Common mistakes and best practices + +Dependencies: + PyYAML (install with: pip install pyyaml) + +Usage: + python3 scripts/validate_buildspec.py + python3 scripts/validate_buildspec.py patterns/*/buildspec.yml + +Exit codes: + 0 - All buildspec files are valid + 1 - One or more buildspec files have errors +""" + +import sys +from pathlib import Path +from typing import Any, Dict, List, Tuple +import glob + +try: + import yaml +except ImportError: + print("Error: PyYAML is not installed.") + print("Install it with: pip install pyyaml") + print("Or use the system Python with yaml pre-installed") + sys.exit(1) + + +class BuildspecValidator: + """Validator for AWS CodeBuild buildspec files""" + + SUPPORTED_VERSIONS = [0.1, 0.2] + VALID_PHASES = [ + "install", + "pre_build", + "build", + "post_build", + ] + PHASE_FIELDS = ["commands", "runtime-versions", "finally"] + + def __init__(self, filepath: str): + self.filepath = Path(filepath) + self.errors: List[str] = [] + self.warnings: List[str] = [] + self.buildspec: Dict[str, Any] = {} + + def validate(self) -> bool: + """Run all validation checks. Returns True if valid.""" + try: + # Load YAML + with open(self.filepath, "r") as f: + self.buildspec = yaml.safe_load(f) + except yaml.YAMLError as e: + self.errors.append(f"YAML parsing error: {e}") + return False + except Exception as e: + self.errors.append(f"Error reading file: {e}") + return False + + # Run validation checks + self._validate_version() + self._validate_phases() + self._validate_env() + self._validate_artifacts() + + return len(self.errors) == 0 + + def _validate_version(self): + """Validate version field""" + if "version" not in self.buildspec: + self.errors.append("Missing required 'version' field") + return + + version = self.buildspec["version"] + if version not in self.SUPPORTED_VERSIONS: + self.errors.append( + f"Invalid version '{version}'. Supported versions: {self.SUPPORTED_VERSIONS}" + ) + + def _validate_phases(self): + """Validate phases section""" + if "phases" not in self.buildspec: + self.errors.append("Missing required 'phases' field") + return + + phases = self.buildspec["phases"] + if not isinstance(phases, dict): + self.errors.append("'phases' must be a dictionary") + return + + if len(phases) == 0: + self.warnings.append("'phases' section is empty") + + # Validate each phase + for phase_name, phase_content in phases.items(): + if phase_name not in self.VALID_PHASES: + self.warnings.append( + f"Unknown phase '{phase_name}'. Valid phases: {self.VALID_PHASES}" + ) + + if not isinstance(phase_content, dict): + self.errors.append(f"Phase '{phase_name}' must be a dictionary") + continue + + # Validate phase content + self._validate_phase_content(phase_name, phase_content) + + def _validate_phase_content(self, phase_name: str, phase_content: Dict): + """Validate content within a phase""" + # Check for commands + if "commands" in phase_content: + commands = phase_content["commands"] + if not isinstance(commands, list): + self.errors.append(f"Phase '{phase_name}': 'commands' must be a list") + else: + # Validate each command is a string + for idx, cmd in enumerate(commands, 1): + if not isinstance(cmd, str): + self.errors.append( + f"Phase '{phase_name}', command #{idx} must be a string, got {type(cmd).__name__}" + ) + + # Check for unknown fields + unknown_fields = set(phase_content.keys()) - set(self.PHASE_FIELDS) + if unknown_fields: + self.warnings.append( + f"Phase '{phase_name}' has unknown fields: {', '.join(unknown_fields)}" + ) + + def _validate_env(self): + """Validate env section if present""" + if "env" not in self.buildspec: + return + + env = self.buildspec["env"] + if not isinstance(env, dict): + self.errors.append("'env' must be a dictionary") + return + + # Validate env subsections + valid_env_sections = [ + "variables", + "parameter-store", + "secrets-manager", + "exported-variables", + "git-credential-helper", + ] + + for section in env.keys(): + if section not in valid_env_sections: + self.warnings.append(f"Unknown env section: '{section}'") + + def _validate_artifacts(self): + """Validate artifacts section if present""" + if "artifacts" not in self.buildspec: + return + + artifacts = self.buildspec["artifacts"] + if not isinstance(artifacts, dict): + self.errors.append("'artifacts' must be a dictionary") + return + + # Check for required fields in artifacts + if "files" not in artifacts: + self.warnings.append("'artifacts' section has no 'files' specified") + + def print_results(self): + """Print validation results""" + print(f"\nValidating: {self.filepath}") + print("=" * 70) + + if self.errors: + print(f"\n❌ ERRORS ({len(self.errors)}):") + for error in self.errors: + print(f" - {error}") + + if self.warnings: + print(f"\n⚠️ WARNINGS ({len(self.warnings)}):") + for warning in self.warnings: + print(f" - {warning}") + + if not self.errors and not self.warnings: + print("✅ Valid buildspec file") + elif not self.errors: + print("\n✅ Valid buildspec file (with warnings)") + else: + print("\n❌ Invalid buildspec file") + + # Print summary + if self.buildspec.get("phases"): + print(f"\nSummary:") + print(f" Version: {self.buildspec.get('version', 'N/A')}") + print(f" Phases: {', '.join(self.buildspec['phases'].keys())}") + for phase, content in self.buildspec["phases"].items(): + if isinstance(content, dict) and "commands" in content: + cmd_count = len(content["commands"]) + print(f" - {phase}: {cmd_count} commands") + + +def main(): + if len(sys.argv) < 2: + print("Usage: python3 validate_buildspec.py ") + print(" python3 validate_buildspec.py patterns/*/buildspec.yml") + sys.exit(1) + + # Expand glob patterns + files = [] + for pattern in sys.argv[1:]: + expanded = glob.glob(pattern, recursive=True) + if expanded: + files.extend(expanded) + else: + # Not a glob pattern, treat as regular file + files.append(pattern) + + if not files: + print("❌ No buildspec files found") + sys.exit(1) + + all_valid = True + validators = [] + + for filepath in files: + validator = BuildspecValidator(filepath) + is_valid = validator.validate() + validator.print_results() + validators.append(validator) + + if not is_valid: + all_valid = False + + # Print overall summary + if len(validators) > 1: + print("\n" + "=" * 70) + print("OVERALL SUMMARY") + print("=" * 70) + valid_count = sum(1 for v in validators if not v.errors) + print(f"Total files: {len(validators)}") + print(f"Valid: {valid_count}") + print(f"Invalid: {len(validators) - valid_count}") + + sys.exit(0 if all_valid else 1) + + +if __name__ == "__main__": + main() From d9731bf7b5dd7ce8c8737bd7511dc1a76b17bf87 Mon Sep 17 00:00:00 2001 From: Kazmer Nagy-Betegh Date: Wed, 5 Nov 2025 17:38:07 +0000 Subject: [PATCH 3/3] pymupdf version pin --- lib/idp_common_pkg/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/idp_common_pkg/pyproject.toml b/lib/idp_common_pkg/pyproject.toml index e1d0d7a8f..6e0483ed6 100644 --- a/lib/idp_common_pkg/pyproject.toml +++ b/lib/idp_common_pkg/pyproject.toml @@ -162,7 +162,7 @@ agentic-extraction = [ "jsonpatch==1.33", "strands-agents>=1.7.1 ; python_full_version >= '3.10'", "pandas>=2.2.3", - "pymupdf>=1.25.5", + "pymupdf==1.25.5", # Pinned to 1.25.5 - has pre-built ARM64 wheels, 1.26.x requires compilation "email-validator>=2.3.0", "tabulate>=0.9.0", "aws-lambda-powertools>=3.2.0", # Structured logging and observability