From 30a27a589fc19f41d2227e922a318cb593abe5f4 Mon Sep 17 00:00:00 2001 From: PavelMakarchuk Date: Wed, 24 Sep 2025 18:13:24 -0400 Subject: [PATCH 1/3] Fix immigration status mapping Fixes #439 --- policyengine_us_data/datasets/cps/cps.py | 6 ++- .../tests/test_datasets/test_enhanced_cps.py | 46 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index f932e0d5..a7eeb9a1 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -1410,6 +1410,9 @@ def get_arrival_year_midpoint(peinusyr): len(person), "LEGAL_PERMANENT_RESIDENT", dtype="U32" ) + # Set citizens (SSN card type 1) to CITIZEN status + immigration_status[ssn_card_type == 1] = "CITIZEN" + # 1. Undocumented: SSN card type 0 who arrived 1982 or later arrived_before_1982 = np.isin(person.PEINUSYR, [1, 2, 3, 4, 5, 6, 7]) undoc_mask = (ssn_card_type == 0) & (~arrived_before_1982) @@ -1459,7 +1462,8 @@ def get_arrival_year_midpoint(peinusyr): immigration_status[mask] = "TPS" # Final write (all values now in ImmigrationStatus Enum) - cps["immigration_status"] = immigration_status.astype("S") + # Save as immigration_status_str since that's what PolicyEngine expects + cps["immigration_status_str"] = immigration_status.astype("S") # ============================================================================ # CONVERT TO STRING LABELS AND STORE # ============================================================================ diff --git a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py index 8a9d8d2b..47a0b922 100644 --- a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py +++ b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py @@ -177,6 +177,52 @@ def test_aca_calibration(): ), f"One or more states exceeded tolerance of {TOLERANCE:.0%}." +def test_immigration_status_diversity(): + """Test that immigration statuses show appropriate diversity (not all citizens).""" + from policyengine_us_data.datasets.cps import EnhancedCPS_2024 + from policyengine_us import Microsimulation + import numpy as np + + sim = Microsimulation(dataset=EnhancedCPS_2024) + + # Get immigration status for all persons (already weighted MicroSeries) + immigration_status = sim.calculate("immigration_status", 2024) + + # Count different statuses + unique_statuses, counts = np.unique(immigration_status, return_counts=True) + + # Calculate percentages using the weights directly + total_population = len(immigration_status) + status_percentages = {} + + for status, count in zip(unique_statuses, counts): + pct = 100 * count / total_population + status_percentages[status] = pct + print(f" {status}: {count:,} ({pct:.1f}%)") + + # Test that not everyone is a citizen (would indicate default value being used) + citizen_pct = status_percentages.get("CITIZEN", 0) + + # Fail if more than 99% are citizens (indicating the default is being used) + assert citizen_pct < 99, ( + f"Too many citizens ({citizen_pct:.1f}%) - likely using default value. " + "Immigration status not being read from data." + ) + + # Also check that we have a reasonable percentage of citizens (should be 85-90%) + assert 80 < citizen_pct < 95, ( + f"Citizen percentage ({citizen_pct:.1f}%) outside expected range (80-95%)" + ) + + # Check that we have some non-citizens + non_citizen_pct = 100 - citizen_pct + assert non_citizen_pct > 5, ( + f"Too few non-citizens ({non_citizen_pct:.1f}%) - expected at least 5%" + ) + + print(f"Immigration status diversity test passed: {citizen_pct:.1f}% citizens") + + def test_medicaid_calibration(): import pandas as pd From d54ed56cf7787a2939a0d934a82b9150cdf81873 Mon Sep 17 00:00:00 2001 From: PavelMakarchuk Date: Wed, 24 Sep 2025 18:18:33 -0400 Subject: [PATCH 2/3] lint --- .../tests/test_datasets/test_enhanced_cps.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py index 47a0b922..4634f4bd 100644 --- a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py +++ b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py @@ -210,17 +210,19 @@ def test_immigration_status_diversity(): ) # Also check that we have a reasonable percentage of citizens (should be 85-90%) - assert 80 < citizen_pct < 95, ( - f"Citizen percentage ({citizen_pct:.1f}%) outside expected range (80-95%)" - ) + assert ( + 80 < citizen_pct < 95 + ), f"Citizen percentage ({citizen_pct:.1f}%) outside expected range (80-95%)" # Check that we have some non-citizens non_citizen_pct = 100 - citizen_pct - assert non_citizen_pct > 5, ( - f"Too few non-citizens ({non_citizen_pct:.1f}%) - expected at least 5%" - ) + assert ( + non_citizen_pct > 5 + ), f"Too few non-citizens ({non_citizen_pct:.1f}%) - expected at least 5%" - print(f"Immigration status diversity test passed: {citizen_pct:.1f}% citizens") + print( + f"Immigration status diversity test passed: {citizen_pct:.1f}% citizens" + ) def test_medicaid_calibration(): From 44fe984e6785a345fd4b3afa2e050f18761b6b6f Mon Sep 17 00:00:00 2001 From: PavelMakarchuk Date: Wed, 24 Sep 2025 18:19:08 -0400 Subject: [PATCH 3/3] cl --- changelog_entry.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..90860eaf 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - Immigration status mapping.