Skip to content

feat: GLiNER integration v4.2.0 - Modern NER with 32x performance boost #81

feat: GLiNER integration v4.2.0 - Modern NER with 32x performance boost

feat: GLiNER integration v4.2.0 - Modern NER with 32x performance boost #81

Workflow file for this run

name: CI
on:
push:
branches: [main, dev, "feature/*", "fix/*", "chore/*", "cleanup/*"]
pull_request:
branches: [main, dev]
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- name: Install pre-commit
run: pip install pre-commit
- name: Run pre-commit
run: pre-commit run --all-files --show-diff-on-failure
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
- name: Install Tesseract OCR
run: |
sudo apt-get update
sudo apt-get install -y tesseract-ocr libtesseract-dev
- name: Install all dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[all]"
pip install -r requirements-dev.txt
- name: Run test suite (excluding GLiNER tests to prevent PyTorch segfault)
run: |
python -m pytest tests/ -v --ignore=tests/test_gliner_annotator.py
- name: Validate GLiNER imports (without running tests that load PyTorch models)
run: |
python -c "
import sys
try:
from datafog.processing.text_processing.gliner_annotator import GLiNERAnnotator
print('✅ GLiNER imports work')
except ImportError as e:
print(f'⚠️ GLiNER dependencies not available (expected in CI): {e}')
except Exception as e:
print(f'❌ GLiNER import error: {e}')
sys.exit(1)
"
- name: Run coverage on core modules only
run: |
python -m pytest tests/test_text_service.py tests/test_regex_annotator.py tests/test_anonymizer.py --cov=datafog --cov-report=xml --cov-config=.coveragerc
- name: Upload coverage
uses: codecov/codecov-action@v4
with:
file: ./coverage.xml
token: ${{ secrets.CODECOV_TOKEN }}
test-core:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
- name: Install core dependencies only
run: |
python -m pip install --upgrade pip
pip install -e .
pip install pytest pytest-cov
- name: Test core functionality
run: |
python -c "from datafog import detect_pii, anonymize_text; print('Core API works')"
python -c "from datafog import detect, process; print('Legacy API works')"
python -m pytest tests/test_regex_annotator.py -v
wheel-size:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- name: Install build dependencies
run: |
python -m pip install --upgrade pip
pip install build wheel
- name: Build wheel
run: python -m build --wheel
- name: Check wheel size
run: python scripts/check_wheel_size.py