diff --git a/.github/scripts/README.md b/.github/scripts/README.md new file mode 100644 index 00000000..9b6bf678 --- /dev/null +++ b/.github/scripts/README.md @@ -0,0 +1,122 @@ +# Restructuring Script + +This directory contains a script to restructure the glean package from: + +``` +src/glean/ # All implementation files +``` + +To: + +``` +src/glean/ # Implicit namespace package (no __init__.py) +src/glean/api_client/ # All implementation files moved here +``` + +## Usage + +### Analyze what would change (recommended first step) + +```bash +python scripts/restructure_to_namespace.py --dry-run +``` + +This shows you: + +- Which files would be moved +- Which import statements would be updated +- Current state of the transformation + +### Perform the restructuring + +```bash +python scripts/restructure_to_namespace.py +``` + +This script: + +- **Detects Speakeasy regeneration** and automatically handles it +- Creates a backup and moves all files +- Uses implicit namespace packages (no `__init__.py` needed) +- Can be run multiple times safely +- Updates all import statements throughout the codebase + +## Smart Speakeasy Integration + +The script automatically detects when Speakeasy has regenerated files: + +1. **First run**: Moves everything to `api_client/` +2. **After Speakeasy regeneration**: Detects new files in `src/glean/`, removes old `api_client/`, and re-runs the transformation +3. **Subsequent runs**: Detects already-transformed structure and skips + +This means you can safely run the script as part of your build process! + +## Examples + +```bash +# First, see what would be changed +python scripts/restructure_to_namespace.py --dry-run + +# If it looks good, perform the restructuring +python scripts/restructure_to_namespace.py + +# Safe to run multiple times - it will detect and handle various states +python scripts/restructure_to_namespace.py # Skips if already done +python scripts/restructure_to_namespace.py # Auto-detects Speakeasy regeneration +``` + +## What the restructuring does + +1. **Creates a backup** of the current `src/glean` directory +2. **Moves all files** from `src/glean/` to `src/glean/api_client/` +3. **Creates an implicit namespace package** (no `__init__.py` - Python 3.3+ feature) +4. **Updates all import statements** in tests, examples, and internal files +5. **Handles Speakeasy regeneration** automatically + +## After restructuring + +Users will need to update their imports: + +### Before + +```python +from glean import Glean, models, errors +from glean.utils import parse_datetime +``` + +### After + +```python +from glean.api_client import Glean, models, errors +from glean.api_client.utils import parse_datetime +``` + +## Workflow Integration + +You can integrate this into your build process: + +```bash +# In your build script or CI +speakeasy generate # Regenerates files to src/glean/ +python scripts/restructure_to_namespace.py # Automatically detects and re-transforms +``` + +## Recovery + +If something goes wrong, the script provides the path to the backup directory: + +```bash +rm -rf src/glean +cp -r /path/to/backup/glean src/glean +``` + +## Testing after restructuring + +```bash +# Run tests +python -m pytest + +# Try importing +python -c "from glean.api_client import Glean; print('Success!')" +``` + diff --git a/.github/scripts/patch_extend_path.sh b/.github/scripts/patch_extend_path.sh deleted file mode 100755 index 5416cba8..00000000 --- a/.github/scripts/patch_extend_path.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -FILE="${1:-src/glean/__init__.py}" - -# If the lines are already present, exit without modifying. -if grep -Fq "extend_path(__path__, __name__)" "$FILE"; then - if [[ -n "${OUTPUT:-}" ]]; then - echo "patched=false" >> "$OUTPUT" - fi - exit 0 -fi - -# Insert the two lines immediately after the first line. -awk 'NR==1 {print $0; print "from pkgutil import extend_path\n"; print "__path__ = extend_path(__path__, __name__)\n"; next} 1' "$FILE" > "$FILE.new" -mv "$FILE.new" "$FILE" - -if [[ -n "${OUTPUT:-}" ]]; then - echo "patched=true" >> "$OUTPUT" -fi \ No newline at end of file diff --git a/.github/scripts/restructure_to_namespace.py b/.github/scripts/restructure_to_namespace.py new file mode 100755 index 00000000..5c277665 --- /dev/null +++ b/.github/scripts/restructure_to_namespace.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python3 +""" +Codemod script to restructure the glean package into a namespace structure. + +This script moves all API client code from src/glean to src/glean/api_client, +making glean a namespace package and api_client the actual implementation. + +It also detects if Speakeasy has regenerated files and automatically re-runs +the transformation. +""" + +import shutil +import tempfile +import sys +from pathlib import Path +import re +from typing import List + + +class GleanRestructure: + def __init__(self, project_root: Path): + self.project_root = project_root + self.src_dir = project_root / "src" + self.glean_dir = self.src_dir / "glean" + + def update_imports_in_file(self, file_path: Path) -> bool: + """Update import statements in a Python or Markdown file to use the new structure.""" + try: + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + # Skip if already contains api_client + if "api_client" in content: + return False + + original_content = content + + # Apply the actual import transformations + transformations = [ + # from glean import X, Y, Z -> from glean.api_client import X, Y, Z + ( + r"from glean(?!\.api_client) import\s+", + r"from glean.api_client import ", + ), + # from glean.something import ... -> from glean.api_client.something import ... + (r"from glean\.(?!api_client)([^.\s]+)", r"from glean.api_client.\1"), + # import glean.something -> import glean.api_client.something + ( + r"import glean\.(?!api_client)([^.\s]+)", + r"import glean.api_client.\1", + ), + # String-based module paths in data structures (e.g. `_sub_sdk_map` in `sdks.py`) + (r'"glean\.(?!api_client)([^."]+)"', r'"glean.api_client.\1"'), + (r"'glean\.(?!api_client)([^.']+)'", r"'glean.api_client.\1'"), + ] + + for pattern, replacement in transformations: + content = re.sub(pattern, replacement, content) + + # Only write if content changed + if content != original_content: + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + print(f"Updated imports in: {file_path}") + return True + + return False + + except Exception as e: + print(f"Error processing {file_path}: {e}") + return False + + def update_imports_in_moved_files(self, api_client_dir: Path): + """Update internal imports within the moved api_client directory.""" + files_to_process = list(api_client_dir.rglob("*.py")) + + for file_path in files_to_process: + self.update_imports_in_file(file_path) + + def get_files_to_move(self) -> List[Path]: + """Get list of files/directories that would be moved.""" + files_to_move = [] + for item in self.glean_dir.iterdir(): + if item.name not in ["api_client", "__pycache__"]: + files_to_move.append(item) + return files_to_move + + def detect_speakeasy_regeneration(self) -> bool: + """ + Detect if Speakeasy has regenerated files after our transformation. + + Returns True if regeneration is detected (i.e., there are files/dirs other than api_client) + """ + api_client_dir = self.glean_dir / "api_client" + + if not api_client_dir.exists(): + # No api_client directory means we haven't run the transformation yet + return False + + # Check if there are any files/directories other than api_client and __pycache__ + other_items = self.get_files_to_move() + return len(other_items) > 0 + + def move_files_to_api_client(self): + """Move files from glean/ to glean/api_client/.""" + api_client_dir = self.glean_dir / "api_client" + api_client_dir.mkdir(exist_ok=True) + + print("Moving files to api_client...") + files_to_move = self.get_files_to_move() + + for item in files_to_move: + dest = api_client_dir / item.name + print(f"Moving {item} -> {dest}") + shutil.move(str(item), str(dest)) + + def update_project_imports(self): + """Update imports in tests, examples, documentation, and other project files.""" + print("Updating imports in tests, examples, and documentation...") + + # Update test files + tests_dir = self.project_root / "tests" + if tests_dir.exists(): + for test_file in tests_dir.rglob("*.py"): + self.update_imports_in_file(test_file) + + # Update example files + examples_dir = self.project_root / "examples" + if examples_dir.exists(): + for example_file in examples_dir.rglob("*.py"): + self.update_imports_in_file(example_file) + + # Update any other Python files in the project root + for py_file in self.project_root.glob("*.py"): + self.update_imports_in_file(py_file) + + # Update markdown files with Python code snippets + self.update_markdown_files() + + def update_markdown_files(self): + """Update Python code snippets in markdown files.""" + print("Updating Python code snippets in markdown files...") + + # Find all markdown files in the project + markdown_files = [] + + # Check docs directory + docs_dir = self.project_root / "docs" + if docs_dir.exists(): + markdown_files.extend(docs_dir.rglob("*.md")) + + # Check root level markdown files + markdown_files.extend(self.project_root.glob("*.md")) + + # Also check other common locations + for dirname in ["examples", "tests"]: + dir_path = self.project_root / dirname + if dir_path.exists(): + markdown_files.extend(dir_path.rglob("*.md")) + + for md_file in markdown_files: + if self.update_imports_in_file(md_file): + print(f"Updated markdown file: {md_file}") + + def perform_restructure(self): + """Perform the actual restructuring of files.""" + # Create a temporary backup + temp_backup = tempfile.mkdtemp(prefix="glean_backup_") + backup_glean = Path(temp_backup) / "glean" + + try: + print(f"Creating backup at: {temp_backup}") + shutil.copytree(self.glean_dir, backup_glean) + except Exception as e: + print(f"Error creating backup: {e}") + sys.exit(1) + + try: + # Move files to api_client + self.move_files_to_api_client() + + # No need to create __init__.py - Python 3.3+ supports implicit namespace packages + # The absence of __init__.py makes src/glean a namespace package automatically + print("Using implicit namespace package (no __init__.py needed)") + + # Update imports in the moved files + api_client_dir = self.glean_dir / "api_client" + print("Updating imports in moved files...") + self.update_imports_in_moved_files(api_client_dir) + + # Update imports in other parts of the project + self.update_project_imports() + + print("\nRestructuring complete!") + print(f"Backup created at: {temp_backup}") + print("New structure:") + print(" src/glean/ (implicit namespace package)") + print(" src/glean/api_client/ (actual implementation)") + + print("\nTo use the restructured package:") + print(" from glean.api_client import Glean") + print(" # or") + print(" import glean.api_client as glean") + + print(f"\nIf anything goes wrong, you can restore from: {temp_backup}") + + except Exception as e: + print(f"Error during restructuring: {e}") + print(f"Restoring from backup: {temp_backup}") + + # Restore from backup + backup_glean = Path(temp_backup) / "glean" + if self.glean_dir.exists(): + shutil.rmtree(self.glean_dir) + shutil.copytree(backup_glean, self.glean_dir) + + sys.exit(1) + + def run(self): + """Main entry point for the restructuring process.""" + if not self.glean_dir.exists(): + print( + "Error: src/glean directory not found. Run this script from the project root." + ) + sys.exit(1) + + print("Checking for Speakeasy regeneration...") + + speakeasy_regenerated = self.detect_speakeasy_regeneration() + api_client_dir = self.glean_dir / "api_client" + + if speakeasy_regenerated: + print( + "🔄 Detected Speakeasy regeneration - files found outside api_client/" + ) + print( + "This means Speakeasy has regenerated the client after our transformation." + ) + print( + "Removing old api_client/ and re-running transformation from scratch..." + ) + if api_client_dir.exists(): + shutil.rmtree(api_client_dir) + print(f"Removed {api_client_dir}") + + elif api_client_dir.exists() and any(api_client_dir.iterdir()): + print("✅ Already restructured - api_client/ exists and contains files") + print( + "If you want to force re-restructuring, delete src/glean/api_client/ first" + ) + return + + print("Starting restructure...") + print(f"Project root: {self.project_root}") + print(f"Source dir: {self.src_dir}") + print(f"Glean dir: {self.glean_dir}") + self.perform_restructure() + + +def main(): + # Get the project root (should be run from project root) + project_root = Path.cwd() + + restructurer = GleanRestructure(project_root) + restructurer.run() + + +if __name__ == "__main__": + main() diff --git a/.github/workflows/patch-speakeasy-pr.yml b/.github/workflows/patch-speakeasy-pr.yml index 5189435c..c0d2f9c8 100644 --- a/.github/workflows/patch-speakeasy-pr.yml +++ b/.github/workflows/patch-speakeasy-pr.yml @@ -1,10 +1,8 @@ -name: Patch Speakeasy PR +name: Restructure Speakeasy PR on: pull_request: types: [opened, synchronize] - paths: - - 'src/glean/__init__.py' workflow_dispatch: jobs: @@ -21,17 +19,19 @@ jobs: ref: ${{ github.head_ref || github.ref_name }} fetch-depth: 0 - - name: Patch glean/__init__.py with extend_path - id: patch + - name: Restructure glean package to namespace structure run: | - OUTPUT=$GITHUB_OUTPUT .github/scripts/patch_extend_path.sh + python .github/scripts/restructure_to_namespace.py - - name: Commit and push if file changed - if: steps.patch.outputs.patched == 'true' + - name: Commit and push if files changed run: | - git config --global user.name "github-actions[bot]" - git config --global user.email "github-actions[bot]@users.noreply.github.com" + if [ -n "$(git status --porcelain)" ]; then + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" - git add src/glean/__init__.py - git commit -m "ci: ensure extend_path lines in glean/__init__.py" || echo "No changes to commit" - git push origin HEAD:${{ github.head_ref || github.ref_name }} + git add . + git commit -m "ci: restructure glean package to namespace structure" + git push origin HEAD:${{ github.head_ref || github.ref_name }} + else + echo "No changes detected" + fi