feat: GLiNER integration v4.2.0 - Modern NER with 32x performance boost #81
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| push: | |
| branches: [main, dev, "feature/*", "fix/*", "chore/*", "cleanup/*"] | |
| pull_request: | |
| branches: [main, dev] | |
| jobs: | |
| lint: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.10" | |
| cache: "pip" | |
| - name: Install pre-commit | |
| run: pip install pre-commit | |
| - name: Run pre-commit | |
| run: pre-commit run --all-files --show-diff-on-failure | |
| build: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: ["3.10", "3.11", "3.12"] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| cache: "pip" | |
| - name: Install Tesseract OCR | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y tesseract-ocr libtesseract-dev | |
| - name: Install all dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -e ".[all]" | |
| pip install -r requirements-dev.txt | |
| - name: Run test suite (excluding GLiNER tests to prevent PyTorch segfault) | |
| run: | | |
| python -m pytest tests/ -v --ignore=tests/test_gliner_annotator.py | |
| - name: Validate GLiNER imports (without running tests that load PyTorch models) | |
| run: | | |
| python -c " | |
| import sys | |
| try: | |
| from datafog.processing.text_processing.gliner_annotator import GLiNERAnnotator | |
| print('✅ GLiNER imports work') | |
| except ImportError as e: | |
| print(f'⚠️ GLiNER dependencies not available (expected in CI): {e}') | |
| except Exception as e: | |
| print(f'❌ GLiNER import error: {e}') | |
| sys.exit(1) | |
| " | |
| - name: Run coverage on core modules only | |
| run: | | |
| python -m pytest tests/test_text_service.py tests/test_regex_annotator.py tests/test_anonymizer.py --cov=datafog --cov-report=xml --cov-config=.coveragerc | |
| - name: Upload coverage | |
| uses: codecov/codecov-action@v4 | |
| with: | |
| file: ./coverage.xml | |
| token: ${{ secrets.CODECOV_TOKEN }} | |
| test-core: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: ["3.10", "3.11", "3.12"] | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| cache: "pip" | |
| - name: Install core dependencies only | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -e . | |
| pip install pytest pytest-cov | |
| - name: Test core functionality | |
| run: | | |
| python -c "from datafog import detect_pii, anonymize_text; print('Core API works')" | |
| python -c "from datafog import detect, process; print('Legacy API works')" | |
| python -m pytest tests/test_regex_annotator.py -v | |
| wheel-size: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.10" | |
| cache: "pip" | |
| - name: Install build dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install build wheel | |
| - name: Build wheel | |
| run: python -m build --wheel | |
| - name: Check wheel size | |
| run: python scripts/check_wheel_size.py |