PolicyEngine
diff --git a/‎.github/workflows/code_changes.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/code_changes.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/local_area_publish.yaml‎
Lines changed: 70 additions & 0 deletions b/‎.github/workflows/local_area_publish.yaml‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎.github/workflows/pr_code_changes.yaml‎
Lines changed: 21 additions & 1 deletion b/‎.github/workflows/pr_code_changes.yaml‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎.github/workflows/reusable_test.yaml‎
Lines changed: 11 additions & 11 deletions b/‎.github/workflows/reusable_test.yaml‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎.gitignore‎
Lines changed: 8 additions & 0 deletions b/‎.gitignore‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 5 additions & 2 deletions b/‎Makefile‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎changelog_entry.yaml‎
Lines changed: 10 additions & 0 deletions b/‎changelog_entry.yaml‎
Lines changed: 10 additions & 0 deletions
@@ -37,9 +37,9 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@v5
       - name: Install package
-        run: uv pip install -e .[dev] --system
+        run: uv sync --dev
       - name: Build package
-        run: python -m build
+        run: uv run python -m build
       - name: Publish a Python distribution to PyPI
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
 
@@ -0,0 +1,70 @@
+name: Publish Local Area H5 Files
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'policyengine_us_data/datasets/cps/local_area_calibration/**'
+      - '.github/workflows/local_area_publish.yaml'
+  repository_dispatch:
+    types: [calibration-updated]
+  workflow_dispatch:
+
+# Trigger strategy:
+# 1. Automatic: Code changes to local_area_calibration/ pushed to main
+# 2. repository_dispatch: Calibration workflow triggers after uploading new weights
+# 3. workflow_dispatch: Manual trigger when you update weights/data on HF yourself
+
+jobs:
+  publish-local-area:
+    runs-on: self-hosted
+    permissions:
+      contents: read
+      id-token: write
+    env:
+      HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+
+      - name: Authenticate to Google Cloud
+        uses: google-github-actions/auth@v2
+        with:
+          workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider"
+          service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com"
+
+      - name: Install package
+        run: uv sync --dev
+
+      - name: Download checkpoint (if exists)
+        continue-on-error: true
+        run: |
+          gsutil cp gs://policyengine-us-data/checkpoints/completed_states.txt . || true
+          gsutil cp gs://policyengine-us-data/checkpoints/completed_districts.txt . || true
+          gsutil cp gs://policyengine-us-data/checkpoints/completed_cities.txt . || true
+
+      - name: Build and publish local area H5 files
+        run: uv run make publish-local-area
+
+      - name: Upload checkpoint
+        if: always()
+        run: |
+          gsutil cp completed_states.txt gs://policyengine-us-data/checkpoints/ || true
+          gsutil cp completed_districts.txt gs://policyengine-us-data/checkpoints/ || true
+          gsutil cp completed_cities.txt gs://policyengine-us-data/checkpoints/ || true
+
+      - name: Clean up checkpoints on success
+        if: success()
+        run: |
+          gsutil rm gs://policyengine-us-data/checkpoints/completed_states.txt || true
+          gsutil rm gs://policyengine-us-data/checkpoints/completed_districts.txt || true
+          gsutil rm gs://policyengine-us-data/checkpoints/completed_cities.txt || true
@@ -30,8 +30,28 @@ jobs:
           fi
           echo "✅ PR is from the correct repository"
 
-  Lint:
+  check-lock-freshness:
+    name: Check uv.lock freshness
+    runs-on: ubuntu-latest
     needs: check-fork
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+      - name: Check lock file is up-to-date
+        run: |
+          uv lock --upgrade
+          git diff --exit-code uv.lock || {
+            echo "::error::uv.lock is outdated. Run 'uv lock --upgrade' and commit the changes."
+            exit 1
+          }
+
+  Lint:
+    needs: [check-fork, check-lock-freshness]
     uses: ./.github/workflows/reusable_lint.yaml
 
   SmokeTestForMultipleVersions:
 
@@ -57,11 +57,11 @@ jobs:
           service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com"
 
       - name: Install package
-        run: uv pip install -e .[dev] --system
+        run: uv sync --dev
 
       - name: Download data inputs
         if: inputs.full_suite
-        run: make download
+        run: uv run make download
 
       # Temporarily disabled - database target causing issues
       # - name: Create and load calibration targets database 
@@ -70,22 +70,22 @@ jobs:
 
       - name: Build datasets
         if: inputs.full_suite
-        run: make data
+        run: uv run make data
         env:
           TEST_LITE: ${{ !inputs.upload_data }}
           PYTHON_LOG_LEVEL: INFO
 
       - name: Build datasets for local area calibration
         if: inputs.full_suite
         run: |
-          LOCAL_AREA_CALIBRATION=true python policyengine_us_data/datasets/cps/cps.py
-          LOCAL_AREA_CALIBRATION=true python policyengine_us_data/datasets/puf/puf.py
-          LOCAL_AREA_CALIBRATION=true python policyengine_us_data/datasets/cps/extended_cps.py
-          python policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py 10500
+          LOCAL_AREA_CALIBRATION=true uv run python policyengine_us_data/datasets/cps/cps.py
+          LOCAL_AREA_CALIBRATION=true uv run python policyengine_us_data/datasets/puf/puf.py
+          LOCAL_AREA_CALIBRATION=true uv run python policyengine_us_data/datasets/cps/extended_cps.py
+          uv run python policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py 10500
 
       - name: Run local area calibration tests
         if: inputs.full_suite
-        run: pytest policyengine_us_data/tests/test_local_area_calibration/ -v
+        run: uv run pytest policyengine_us_data/tests/test_local_area_calibration/ -v
 
       - name: Save calibration log
         if: inputs.full_suite
@@ -95,14 +95,14 @@ jobs:
           path: calibration_log.csv
 
       - name: Run tests
-        run: pytest
+        run: uv run pytest
 
       - name: Upload data
         if: inputs.upload_data
-        run: make upload
+        run: uv run make upload
 
       - name: Test documentation builds
-        run: make documentation
+        run: uv run make documentation
         env:
           BASE_URL: ${{ inputs.deploy_docs && '/policyengine-us-data' || '' }}
 
 
@@ -2,6 +2,7 @@
 **/__pycache__
 **/.DS_STORE
 **/*.h5
+**/*.npy
 **/*.csv
 **/_build
 **/*.pkl
@@ -23,4 +24,11 @@ node_modules
 !soi_targets.csv
 !policyengine_us_data/storage/social_security_aux.csv
 !policyengine_us_data/storage/SSPopJul_TR2024.csv
+!policyengine_us_data/storage/national_and_district_rents_2023.csv
 docs/.ipynb_checkpoints/
+
+## Batch processing checkpoints
+completed_*.txt
+
+## Test fixtures
+!policyengine_us_data/tests/test_local_area_calibration/test_fixture_50hh.h5
@@ -1,4 +1,4 @@
-.PHONY: all format test install download upload docker documentation data clean build paper clean-paper presentations
+.PHONY: all format test install download upload docker documentation data data-local-area publish-local-area clean build paper clean-paper presentations
 
 all: data test
 
@@ -62,7 +62,7 @@ database:
 	python policyengine_us_data/db/etl_irs_soi.py
 	python policyengine_us_data/db/validate_database.py
 
-data:
+data: download
 	python policyengine_us_data/utils/uprating.py
 	python policyengine_us_data/datasets/acs/acs.py
 	python policyengine_us_data/datasets/cps/cps.py
@@ -80,6 +80,9 @@ data-local-area: data
 	LOCAL_AREA_CALIBRATION=true python policyengine_us_data/datasets/cps/extended_cps.py
 	python policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py 10500
 
+publish-local-area:
+	python policyengine_us_data/datasets/cps/local_area_calibration/publish_local_area.py
+
 clean:
 	rm -f policyengine_us_data/storage/*.h5
 	rm -f policyengine_us_data/storage/*.db
 
@@ -0,0 +1,10 @@
+- bump: minor
+  changes:
+    added:
+    - Sparse matrix builder for local area calibration with database-driven constraints
+    - Local area calibration data pipeline (make data-local-area)
+    - ExtendedCPS_2023 and PUF_2023 dataset classes
+    - Stratified CPS sampling to preserve high-income households
+    - Matrix verification tests for local area calibration
+    - Population-weighted P(county|CD) distributions from Census block data
+    - County assignment module for stacked dataset builder