fix(ci): improve GLiNER validation to confirm PyTorch exclusion #84
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| push: | |
| branches: [main, dev, "feature/*", "fix/*", "chore/*", "cleanup/*"] | |
| pull_request: | |
| branches: [main, dev] | |
| jobs: | |
| lint: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.10" | |
| cache: "pip" | |
| - name: Install pre-commit | |
| run: pip install pre-commit | |
| - name: Run pre-commit | |
| run: pre-commit run --all-files --show-diff-on-failure | |
| build: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: ["3.10", "3.11", "3.12"] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| cache: "pip" | |
| - name: Install Tesseract OCR | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y tesseract-ocr libtesseract-dev | |
| - name: Install dependencies (excluding PyTorch-based extras to prevent segfault) | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -e ".[nlp,ocr,distributed,web,cli,crypto,dev]" | |
| pip install -r requirements-dev.txt | |
| - name: Run test suite (excluding GLiNER tests to prevent PyTorch segfault) | |
| run: | | |
| python -m pytest tests/ -v --ignore=tests/test_gliner_annotator.py | |
| - name: Validate GLiNER module structure (without PyTorch dependencies) | |
| run: | | |
| python -c " | |
| print('Validating GLiNER module can be imported without PyTorch...') | |
| try: | |
| from datafog.processing.text_processing.gliner_annotator import GLiNERAnnotator | |
| print('❌ GLiNER imported unexpectedly - PyTorch may be installed') | |
| except ImportError as e: | |
| if 'GLiNER dependencies not available' in str(e): | |
| print('✅ GLiNER properly reports missing dependencies (expected in CI)') | |
| else: | |
| print(f'✅ GLiNER import blocked as expected: {e}') | |
| except Exception as e: | |
| print(f'❌ Unexpected GLiNER error: {e}') | |
| exit(1) | |
| " | |
| - name: Run coverage on core modules only | |
| run: | | |
| python -m pytest tests/test_text_service.py tests/test_regex_annotator.py tests/test_anonymizer.py --cov=datafog --cov-report=xml --cov-config=.coveragerc | |
| - name: Upload coverage | |
| uses: codecov/codecov-action@v4 | |
| with: | |
| file: ./coverage.xml | |
| token: ${{ secrets.CODECOV_TOKEN }} | |
| test-core: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: ["3.10", "3.11", "3.12"] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| cache: "pip" | |
| - name: Install core dependencies only | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -e . | |
| pip install pytest pytest-cov | |
| - name: Test core functionality | |
| run: | | |
| python -c "from datafog import detect_pii, anonymize_text; print('Core API works')" | |
| python -c "from datafog import detect, process; print('Legacy API works')" | |
| python -m pytest tests/test_regex_annotator.py -v | |
| wheel-size: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.10" | |
| cache: "pip" | |
| - name: Install build dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install build wheel | |
| - name: Build wheel | |
| run: python -m build --wheel | |
| - name: Check wheel size | |
| run: python scripts/check_wheel_size.py |