PolicyEngine · MaxGhenis · Oct 5, 2025 · Nov 10, 2025 · Dec 3, 2025 · Dec 3, 2025
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+  changes:
+    added:
+      - Move all randomness to data package for deterministic country package. Take-up decisions for SNAP, Medicaid, ACA, EITC, DC PTC, Head Start, and Early Head Start are now generated stochastically during dataset creation using take-up rates from YAML parameter files.
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
@@ -14,6 +14,7 @@
 )
 from microimpute.models.qrf import QRF
 import logging
+from policyengine_us_data.parameters import load_take_up_rate
 
 
 test_lite = os.environ.get("TEST_LITE") == "true"
@@ -207,25 +208,63 @@ def add_takeup(self):
     from policyengine_us import system, Microsimulation
 
     baseline = Microsimulation(dataset=self)
-    parameters = baseline.tax_benefit_system.parameters(self.time_period)
 
+    # Generate all stochastic take-up decisions using take-up rates from parameter files
+    # This keeps the country package purely deterministic
     generator = np.random.default_rng(seed=100)
 
-    eitc_takeup_rates = parameters.gov.irs.credits.eitc.takeup
+    # Load take-up rates from parameter files
+    eitc_rates_by_children = load_take_up_rate("eitc", self.time_period)
+    dc_ptc_rate = load_take_up_rate("dc_ptc", self.time_period)
+    snap_rate = load_take_up_rate("snap", self.time_period)
+    aca_rate = load_take_up_rate("aca", self.time_period)
+    medicaid_rate = load_take_up_rate("medicaid", self.time_period)
+    head_start_rate = load_take_up_rate("head_start", self.time_period)
+    early_head_start_rate = load_take_up_rate(
+        "early_head_start", self.time_period
+    )
+
+    # EITC: varies by number of children
     eitc_child_count = baseline.calculate("eitc_child_count").values
-    eitc_takeup_rate = eitc_takeup_rates.calc(eitc_child_count)
+    eitc_takeup_rate = np.array(
+        [
+            eitc_rates_by_children.get(min(int(c), 3), 0.85)
+            for c in eitc_child_count
+        ]
+    )
     data["takes_up_eitc"] = (
         generator.random(len(data["tax_unit_id"])) < eitc_takeup_rate
     )
-    dc_ptc_takeup_rate = parameters.gov.states.dc.tax.income.credits.ptc.takeup
+
+    # DC Property Tax Credit
     data["takes_up_dc_ptc"] = (
-        generator.random(len(data["tax_unit_id"])) < dc_ptc_takeup_rate
+        generator.random(len(data["tax_unit_id"])) < dc_ptc_rate
+    )
+
+    # SNAP
+    data["takes_up_snap_if_eligible"] = (
+        generator.random(len(data["spm_unit_id"])) < snap_rate
+    )
+
+    # ACA
+    data["takes_up_aca_if_eligible"] = (
+        generator.random(len(data["tax_unit_id"])) < aca_rate
+    )
+
+    # Medicaid
+    data["takes_up_medicaid_if_eligible"] = (
+        generator.random(len(data["person_id"])) < medicaid_rate
+    )
+
+    # Head Start
+    data["takes_up_head_start_if_eligible"] = (
+        generator.random(len(data["person_id"])) < head_start_rate
     )
-    generator = np.random.default_rng(seed=100)
 
-    data["snap_take_up_seed"] = generator.random(len(data["spm_unit_id"]))
-    data["aca_take_up_seed"] = generator.random(len(data["tax_unit_id"]))
-    data["medicaid_take_up_seed"] = generator.random(len(data["person_id"]))
+    # Early Head Start
+    data["takes_up_early_head_start_if_eligible"] = (
+        generator.random(len(data["person_id"])) < early_head_start_rate
+    )
 
     self.save_dataset(data)
 

diff --git a/policyengine_us_data/parameters/__init__.py b/policyengine_us_data/parameters/__init__.py
@@ -0,0 +1,56 @@
+"""
+Take-up rate parameters for stochastic simulation.
+
+These parameters are stored in the data package to keep the country package
+as a purely deterministic rules engine.
+"""
+
+import yaml
+from pathlib import Path
+
+PARAMETERS_DIR = Path(__file__).parent
+
+
+def load_take_up_rate(variable_name: str, year: int = 2018) -> float:
+    """Load take-up rate from YAML parameter files.
+
+    Args:
+        variable_name: Name of the take-up parameter file (without .yaml)
+        year: Year for which to get the rate
+
+    Returns:
+        Take-up rate as a float between 0 and 1
+    """
+    yaml_path = PARAMETERS_DIR / "take_up" / f"{variable_name}.yaml"
+
+    with open(yaml_path) as f:
+        data = yaml.safe_load(f)
+
+    # Handle EITC special case (has rates_by_children instead of values)
+    if "rates_by_children" in data:
+        return data["rates_by_children"]  # Return the dict
+
+    # Find the applicable value for the year
+    values = data["values"]
+    applicable_value = None
+
+    for date_key, value in sorted(values.items()):
+        # Handle both string and datetime.date objects from YAML
+        if hasattr(date_key, "year"):
+            # It's a datetime.date object
+            date_year = date_key.year
+        else:
+            # It's a string
+            date_year = int(date_key.split("-")[0])
+
+        if date_year <= year:
+            applicable_value = value
+        else:
+            break
+
+    if applicable_value is None:
+        raise ValueError(
+            f"No take-up rate found for {variable_name} in {year}"
+        )
+
+    return applicable_value
diff --git a/policyengine_us_data/parameters/take_up/aca.yaml b/policyengine_us_data/parameters/take_up/aca.yaml
@@ -0,0 +1,10 @@
+description: Percentage of eligible people who do enroll in Affordable Care Act coverage, if eligible.
+metadata:
+  label: ACA takeup rate
+  unit: /1
+  period: year
+  reference:
+    - title: KFF "A Closer Look at the Remaining Uninsured Population Eligible for Medicaid and CHIP"
+      href: https://www.kff.org/uninsured/issue-brief/a-closer-look-at-the-remaining-uninsured-population-eligible-for-medicaid-and-chip/#:~:text=the%20uninsured%20rate%20dropped%20to,States%20began%20the
+values:
+  2018-01-01: 0.672
diff --git a/policyengine_us_data/parameters/take_up/dc_ptc.yaml b/policyengine_us_data/parameters/take_up/dc_ptc.yaml
@@ -0,0 +1,11 @@
+description: The share of eligible individuals who claim the DC property tax credit.
+metadata:
+  unit: /1
+  label: DC property tax credit takeup rate
+  period: year
+  reference:
+    - title: District of Columbia Tax Expenditure Report, 2024
+      href: https://ora-cfo.dc.gov/sites/default/files/dc/sites/ora-cfo/publication/attachments/2024%20Tax%20Expenditure%20Report.pdf#page=234
+values:
+  # 37,133 (from 2024 Tax Expenditure Report) / 131,791,388 (PolicyEngine DC PTC value estimate)
+  2021-01-01: 0.32
diff --git a/policyengine_us_data/parameters/take_up/early_head_start.yaml b/policyengine_us_data/parameters/take_up/early_head_start.yaml
@@ -0,0 +1,9 @@
+description: Percentage of eligible infants and toddlers who enroll in Early Head Start.
+metadata:
+  label: Early Head Start take-up rate
+  unit: /1
+  reference:
+    - title: NIEER State(s) of Head Start and Early Head Start Report
+      href: https://nieer.org/research-library/states-head-start-early-head-start
+values:
+  2020-09-01: 0.09
diff --git a/policyengine_us_data/parameters/take_up/eitc.yaml b/policyengine_us_data/parameters/take_up/eitc.yaml
@@ -0,0 +1,12 @@
+description: The share of eligible individuals who claim the EITC (by number of children).
+metadata:
+  label: EITC take-up rate by number of children
+  reference:
+    - title: National Taxpayer Advocate Special Report to Congress 2020 | IRS
+      href: https://www.taxpayeradvocate.irs.gov/wp-content/uploads/2020/08/JRC20_Volume3.pdf#page=62
+# Maps number of children to take-up rate
+rates_by_children:
+  0: 0.65
+  1: 0.86
+  2: 0.85
+  3: 0.85  # Assume same as 2
diff --git a/policyengine_us_data/parameters/take_up/head_start.yaml b/policyengine_us_data/parameters/take_up/head_start.yaml
@@ -0,0 +1,10 @@
+description: Percentage of eligible children who enroll in Head Start.
+metadata:
+  label: Head Start take-up rate
+  unit: /1
+  reference:
+    - title: NIEER State(s) of Head Start and Early Head Start Report
+      href: https://nieer.org/research-library/states-head-start-early-head-start
+values:
+  2020-09-01: 0.40
+  2021-09-01: 0.30
diff --git a/policyengine_us_data/parameters/take_up/medicaid.yaml b/policyengine_us_data/parameters/take_up/medicaid.yaml
@@ -0,0 +1,10 @@
+description: Percentage of people who do enroll in Medicaid, if eligible.
+metadata:
+  label: Medicaid takeup rate
+  unit: /1
+  period: year
+  reference:
+    - title: KFF "A Closer Look at the Remaining Uninsured Population Eligible for Medicaid and CHIP"
+      href: https://www.kff.org/uninsured/issue-brief/a-closer-look-at-the-remaining-uninsured-population-eligible-for-medicaid-and-chip/#:~:text=the%20uninsured%20rate%20dropped%20to,States%20began%20the
+values:
+  2018-01-01: 0.93
diff --git a/policyengine_us_data/parameters/take_up/snap.yaml b/policyengine_us_data/parameters/take_up/snap.yaml
@@ -0,0 +1,9 @@
+description: Percentage of eligible SNAP recipients who claim SNAP.
+metadata:
+  label: SNAP takeup rate
+  unit: /1
+  reference:
+    - title: USDA
+      href: https://www.fns.usda.gov/usamap
+values:
+  2018-01-01: 0.82
diff --git a/policyengine_us_data/tests/test_stochastic_variables.py b/policyengine_us_data/tests/test_stochastic_variables.py
@@ -0,0 +1,110 @@
+"""Tests for stochastic variable generation in the data package.
+
+These tests verify that:
+1. Take-up rate parameters load correctly
+2. Seeded RNG produces deterministic results
+3. Take-up rates produce plausible proportions
+"""
+
+import pytest
+import numpy as np
+from policyengine_us_data.parameters import load_take_up_rate
+
+
+class TestTakeUpRateParameters:
+    """Test that take-up rate parameters load correctly."""
+
+    def test_eitc_rate_loads(self):
+        """EITC take-up rates should load and be plausible."""
+        rates = load_take_up_rate("eitc", 2022)
+        # EITC rates are by number of children: 0, 1, 2, 3+
+        assert isinstance(rates, dict) or isinstance(rates, float)
+        if isinstance(rates, dict):
+            for key, rate in rates.items():
+                assert 0 < rate <= 1
+
+    def test_snap_rate_loads(self):
+        """SNAP take-up rate should load and be plausible."""
+        rate = load_take_up_rate("snap", 2022)
+        assert 0 < rate <= 1
+
+    def test_medicaid_rate_loads(self):
+        """Medicaid take-up rate should load and be plausible."""
+        rate = load_take_up_rate("medicaid", 2022)
+        assert 0 < rate <= 1
+
+    def test_aca_rate_loads(self):
+        """ACA take-up rate should load and be plausible."""
+        rate = load_take_up_rate("aca", 2022)
+        assert 0 < rate <= 1
+
+    def test_head_start_rate_loads(self):
+        """Head Start take-up rate should load and be plausible."""
+        rate = load_take_up_rate("head_start", 2022)
+        assert 0 < rate <= 1
+
+    def test_early_head_start_rate_loads(self):
+        """Early Head Start take-up rate should load and be plausible."""
+        rate = load_take_up_rate("early_head_start", 2022)
+        assert 0 < rate <= 1
+
+    def test_dc_ptc_rate_loads(self):
+        """DC PTC take-up rate should load and be plausible."""
+        rate = load_take_up_rate("dc_ptc", 2022)
+        assert 0 < rate <= 1
+
+
+class TestSeededRandomness:
+    """Test that stochastic generation is deterministic."""
+
+    def test_same_seed_produces_same_results(self):
+        """Using the same seed should produce identical results."""
+        seed = 0
+        n = 1_000
+
+        generator1 = np.random.default_rng(seed=seed)
+        result1 = generator1.random(n)
+
+        generator2 = np.random.default_rng(seed=seed)
+        result2 = generator2.random(n)
+
+        np.testing.assert_array_equal(result1, result2)
+
+    def test_different_seeds_produce_different_results(self):
+        """Different seeds should produce different results."""
+        n = 1_000
+
+        generator1 = np.random.default_rng(seed=0)
+        result1 = generator1.random(n)
+
+        generator2 = np.random.default_rng(seed=1)
+        result2 = generator2.random(n)
+
+        assert not np.array_equal(result1, result2)
+
+
+class TestTakeUpProportions:
+    """Test that take-up rates produce plausible proportions."""
+
+    def test_take_up_produces_expected_proportion(self):
+        """Simulated take-up should match the rate approximately."""
+        rate = 0.7
+        n = 10_000
+        generator = np.random.default_rng(seed=42)
+
+        take_up = generator.random(n) < rate
+        actual_proportion = take_up.mean()
+
+        # Should be within 5 percentage points of the rate
+        assert abs(actual_proportion - rate) < 0.05
+
+    def test_boolean_generation(self):
+        """Take-up decisions should be boolean."""
+        rate = 0.5
+        n = 100
+        generator = np.random.default_rng(seed=42)
+
+        take_up = generator.random(n) < rate
+
+        assert take_up.dtype == bool
+        assert set(take_up).issubset({True, False})