From 9582ecfab65a7e0cec797b8ab32720797f40baec Mon Sep 17 00:00:00 2001
From: David Trimmer <david@policyengine.org>
Date: Thu, 4 Dec 2025 16:08:39 -0500
Subject: [PATCH 01/10] Use spm-calculator to compute SPM thresholds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add spm-calculator as a dependency (from GitHub)
- Create policyengine_us_data/utils/spm.py with threshold calculation utilities
- Update CPS dataset to use spm-calculator with Census-provided GEOADJ factors
- Update ACS dataset to use spm-calculator with national-level thresholds

For CPS: Uses the SPM_GEOADJ values from Census data combined with
spm-calculator's base thresholds and equivalence scale, providing
geographically-adjusted thresholds.

For ACS: Uses national-level thresholds (no geographic adjustment)
since ACS doesn't include pre-computed GEOADJ values.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 policyengine_us_data/datasets/acs/acs.py |  51 +++++++++-
 policyengine_us_data/datasets/cps/cps.py |  23 ++++-
 policyengine_us_data/utils/spm.py        | 122 +++++++++++++++++++++++
 pyproject.toml                           |   1 +
 4 files changed, 191 insertions(+), 6 deletions(-)
 create mode 100644 policyengine_us_data/utils/spm.py

diff --git a/policyengine_us_data/datasets/acs/acs.py b/policyengine_us_data/datasets/acs/acs.py
index 0ecd3ee7..836c2a1f 100644
--- a/policyengine_us_data/datasets/acs/acs.py
+++ b/policyengine_us_data/datasets/acs/acs.py
@@ -25,6 +25,7 @@ def generate(self) -> None:
         self.add_id_variables(acs, person, household)
         self.add_person_variables(acs, person, household)
         self.add_household_variables(acs, household)
+        self.add_spm_variables(acs, person, household, self.time_period)
 
         acs.close()
         raw_data.close()
@@ -93,9 +94,53 @@ def add_person_variables(
         )
 
     @staticmethod
-    def add_spm_variables(acs: h5py.File, spm_unit: DataFrame) -> None:
-        acs["spm_unit_net_income_reported"] = spm_unit.SPM_RESOURCES
-        acs["spm_unit_spm_threshold"] = spm_unit.SPM_POVTHRESHOLD
+    def add_spm_variables(
+        acs: h5py.File,
+        person: DataFrame,
+        household: DataFrame,
+        time_period: int,
+    ) -> None:
+        from policyengine_us_data.utils.spm import (
+            calculate_spm_thresholds_national,
+            map_tenure_acs_to_spm,
+        )
+
+        # In ACS, SPM unit = household
+        # Calculate number of adults (18+) and children (<18) per household
+        person_with_hh = person.copy()
+        person_with_hh["is_adult"] = person_with_hh["AGEP"] >= 18
+        person_with_hh["is_child"] = person_with_hh["AGEP"] < 18
+
+        hh_counts = (
+            person_with_hh.groupby("household_id")
+            .agg({"is_adult": "sum", "is_child": "sum"})
+            .rename(
+                columns={"is_adult": "num_adults", "is_child": "num_children"}
+            )
+        )
+
+        # Ensure household is indexed properly
+        household_indexed = household.set_index("household_id")
+
+        # Get counts aligned with household order
+        num_adults = hh_counts.loc[
+            household_indexed.index, "num_adults"
+        ].values
+        num_children = hh_counts.loc[
+            household_indexed.index, "num_children"
+        ].values
+
+        # Map ACS tenure to SPM tenure codes
+        tenure_codes = map_tenure_acs_to_spm(household_indexed["TEN"].values)
+
+        # Calculate SPM thresholds using national-level values
+        # (ACS doesn't have Census-provided geographic adjustments)
+        acs["spm_unit_spm_threshold"] = calculate_spm_thresholds_national(
+            num_adults=num_adults,
+            num_children=num_children,
+            tenure_codes=tenure_codes,
+            year=time_period,
+        )
 
     @staticmethod
     def add_household_variables(acs: h5py.File, household: DataFrame) -> None:
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index f932e0d5..90fc72c3 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -78,7 +78,7 @@ def generate(self):
             undocumented_students_target=0.21 * 1.9e6,
         )
         logging.info("Adding family variables")
-        add_spm_variables(cps, spm_unit)
+        add_spm_variables(cps, spm_unit, self.time_period)
         logging.info("Adding household variables")
         add_household_variables(cps, household)
         logging.info("Adding rent")
@@ -602,7 +602,15 @@ def add_personal_income_variables(
         cps[f"{var}_would_be_qualified"] = rng.random(len(person)) < prob
 
 
-def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None:
+def add_spm_variables(
+    cps: h5py.File,
+    spm_unit: DataFrame,
+    time_period: int,
+) -> None:
+    from policyengine_us_data.utils.spm import (
+        calculate_spm_thresholds_with_geoadj,
+    )
+
     SPM_RENAMES = dict(
         spm_unit_total_income_reported="SPM_TOTVAL",
         snap_reported="SPM_SNAPSUB",
@@ -616,7 +624,6 @@ def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None:
         # State tax includes refundable credits.
         spm_unit_state_tax_reported="SPM_STTAX",
         spm_unit_capped_work_childcare_expenses="SPM_CAPWKCCXPNS",
-        spm_unit_spm_threshold="SPM_POVTHRESHOLD",
         spm_unit_net_income_reported="SPM_RESOURCES",
         spm_unit_pre_subsidy_childcare_expenses="SPM_CHILDCAREXPNS",
     )
@@ -625,6 +632,16 @@ def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None:
         if asec_variable in spm_unit.columns:
             cps[openfisca_variable] = spm_unit[asec_variable]
 
+    # Calculate SPM thresholds using spm-calculator with Census-provided
+    # geographic adjustment factors (SPM_GEOADJ)
+    cps["spm_unit_spm_threshold"] = calculate_spm_thresholds_with_geoadj(
+        num_adults=spm_unit["SPM_NUMADULTS"].values,
+        num_children=spm_unit["SPM_NUMKIDS"].values,
+        tenure_codes=spm_unit["SPM_TENMORTSTATUS"].values,
+        geoadj=spm_unit["SPM_GEOADJ"].values,
+        year=time_period,
+    )
+
     cps["reduced_price_school_meals_reported"] = (
         cps["free_school_meals_reported"] * 0
     )
diff --git a/policyengine_us_data/utils/spm.py b/policyengine_us_data/utils/spm.py
new file mode 100644
index 00000000..05c04234
--- /dev/null
+++ b/policyengine_us_data/utils/spm.py
@@ -0,0 +1,122 @@
+"""SPM threshold calculation utilities using the spm-calculator package."""
+
+import numpy as np
+from spm_calculator import SPMCalculator, spm_equivalence_scale
+
+
+# Census CPS SPM_TENMORTSTATUS codes to spm-calculator tenure mapping
+# Based on IPUMS SPMMORT documentation:
+# 1 = Owner with mortgage
+# 2 = Owner without mortgage
+# 3 = Renter
+TENURE_CODE_MAP = {
+    1: "owner_with_mortgage",
+    2: "owner_without_mortgage",
+    3: "renter",
+}
+
+
+def calculate_spm_thresholds_with_geoadj(
+    num_adults: np.ndarray,
+    num_children: np.ndarray,
+    tenure_codes: np.ndarray,
+    geoadj: np.ndarray,
+    year: int,
+) -> np.ndarray:
+    """
+    Calculate SPM thresholds using Census-provided geographic adjustments.
+
+    This function uses the SPM_GEOADJ values already computed by the Census
+    Bureau, combined with spm-calculator's base thresholds and equivalence
+    scale formula. This avoids the need for a Census API key.
+
+    Args:
+        num_adults: Array of number of adults (18+) in each SPM unit.
+        num_children: Array of number of children (<18) in each SPM unit.
+        tenure_codes: Array of Census tenure/mortgage status codes.
+            1 = owner with mortgage, 2 = owner without mortgage, 3 = renter.
+        geoadj: Array of Census SPM_GEOADJ geographic adjustment factors.
+        year: The year for which to calculate thresholds.
+
+    Returns:
+        Array of SPM threshold values.
+    """
+    calc = SPMCalculator(year=year)
+    base_thresholds = calc.get_base_thresholds()
+
+    n = len(num_adults)
+    thresholds = np.zeros(n)
+
+    for i in range(n):
+        tenure_str = TENURE_CODE_MAP.get(int(tenure_codes[i]), "renter")
+        base = base_thresholds[tenure_str]
+        equiv_scale = spm_equivalence_scale(
+            int(num_adults[i]), int(num_children[i])
+        )
+        thresholds[i] = base * equiv_scale * geoadj[i]
+
+    return thresholds
+
+
+def calculate_spm_thresholds_national(
+    num_adults: np.ndarray,
+    num_children: np.ndarray,
+    tenure_codes: np.ndarray,
+    year: int,
+) -> np.ndarray:
+    """
+    Calculate SPM thresholds using national-level thresholds (no geoadj).
+
+    This is used for datasets like ACS that don't have pre-computed
+    geographic adjustment factors.
+
+    Args:
+        num_adults: Array of number of adults (18+) in each SPM unit.
+        num_children: Array of number of children (<18) in each SPM unit.
+        tenure_codes: Array of Census tenure/mortgage status codes.
+            1 = owner with mortgage, 2 = owner without mortgage, 3 = renter.
+        year: The year for which to calculate thresholds.
+
+    Returns:
+        Array of SPM threshold values using national averages.
+    """
+    calc = SPMCalculator(year=year)
+    base_thresholds = calc.get_base_thresholds()
+
+    n = len(num_adults)
+    thresholds = np.zeros(n)
+
+    for i in range(n):
+        tenure_str = TENURE_CODE_MAP.get(int(tenure_codes[i]), "renter")
+        base = base_thresholds[tenure_str]
+        equiv_scale = spm_equivalence_scale(
+            int(num_adults[i]), int(num_children[i])
+        )
+        # No geographic adjustment for national-level thresholds
+        thresholds[i] = base * equiv_scale
+
+    return thresholds
+
+
+def map_tenure_acs_to_spm(tenure_type: np.ndarray) -> np.ndarray:
+    """
+    Map ACS tenure type values to spm-calculator tenure codes.
+
+    Args:
+        tenure_type: Array of ACS TEN values.
+            1 = Owned with mortgage/loan
+            2 = Owned free and clear
+            3 = Rented
+
+    Returns:
+        Array of tenure code integers matching Census SPM format.
+    """
+    # ACS TEN codes map directly to Census SPM codes:
+    # ACS 1 (owned with mortgage) -> Census 1 (owner_with_mortgage)
+    # ACS 2 (owned outright) -> Census 2 (owner_without_mortgage)
+    # ACS 3 (rented) -> Census 3 (renter)
+    return np.where(
+        np.isin(tenure_type, [1, 2, 3]),
+        tenure_type,
+        3,  # Default to renter for unknown values
+    )
diff --git a/pyproject.toml b/pyproject.toml
index 3d00d389..f80c7c7f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,6 +42,7 @@ dependencies = [
     "sqlalchemy>=2.0.41",
     "sqlmodel>=0.0.24",
     "xlrd>=2.0.2",
+    "spm-calculator @ git+https://github.com/PolicyEngine/spm-calculator.git",
 ]
 
 [project.optional-dependencies]

From e9fa47e0f200926fd2ee9ea7848572984ac5bb7b Mon Sep 17 00:00:00 2001
From: David Trimmer <david@policyengine.org>
Date: Thu, 4 Dec 2025 16:08:39 -0500
Subject: [PATCH 02/10] Use spm-calculator to compute SPM thresholds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add spm-calculator as a dependency (from GitHub)
- Create policyengine_us_data/utils/spm.py with threshold calculation utilities
- Update CPS dataset to use spm-calculator with Census-provided GEOADJ factors
- Update ACS dataset to use spm-calculator with national-level thresholds

For CPS: Uses the SPM_GEOADJ values from Census data combined with
spm-calculator's base thresholds and equivalence scale, providing
geographically-adjusted thresholds.

For ACS: Uses national-level thresholds (no geographic adjustment)
since ACS doesn't include pre-computed GEOADJ values.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 changelog_entry.yaml                     |   8 ++
 policyengine_us_data/datasets/acs/acs.py |  51 +++++++++-
 policyengine_us_data/datasets/cps/cps.py |  23 ++++-
 policyengine_us_data/utils/spm.py        | 122 +++++++++++++++++++++++
 pyproject.toml                           |   1 +
 5 files changed, 199 insertions(+), 6 deletions(-)
 create mode 100644 policyengine_us_data/utils/spm.py

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29b..c3d09844 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,8 @@
+- bump: minor
+  changes:
+    added:
+    - SPM threshold calculation using policyengine/spm-calculator package
+    - New utility module (policyengine_us_data/utils/spm.py) for SPM calculations
+    changed:
+    - CPS datasets now calculate SPM thresholds using spm-calculator with Census-provided geographic adjustments
+    - ACS datasets now calculate SPM thresholds using spm-calculator with national-level thresholds
diff --git a/policyengine_us_data/datasets/acs/acs.py b/policyengine_us_data/datasets/acs/acs.py
index 0ecd3ee7..836c2a1f 100644
--- a/policyengine_us_data/datasets/acs/acs.py
+++ b/policyengine_us_data/datasets/acs/acs.py
@@ -25,6 +25,7 @@ def generate(self) -> None:
         self.add_id_variables(acs, person, household)
         self.add_person_variables(acs, person, household)
         self.add_household_variables(acs, household)
+        self.add_spm_variables(acs, person, household, self.time_period)
 
         acs.close()
         raw_data.close()
@@ -93,9 +94,53 @@ def add_person_variables(
         )
 
     @staticmethod
-    def add_spm_variables(acs: h5py.File, spm_unit: DataFrame) -> None:
-        acs["spm_unit_net_income_reported"] = spm_unit.SPM_RESOURCES
-        acs["spm_unit_spm_threshold"] = spm_unit.SPM_POVTHRESHOLD
+    def add_spm_variables(
+        acs: h5py.File,
+        person: DataFrame,
+        household: DataFrame,
+        time_period: int,
+    ) -> None:
+        from policyengine_us_data.utils.spm import (
+            calculate_spm_thresholds_national,
+            map_tenure_acs_to_spm,
+        )
+
+        # In ACS, SPM unit = household
+        # Calculate number of adults (18+) and children (<18) per household
+        person_with_hh = person.copy()
+        person_with_hh["is_adult"] = person_with_hh["AGEP"] >= 18
+        person_with_hh["is_child"] = person_with_hh["AGEP"] < 18
+
+        hh_counts = (
+            person_with_hh.groupby("household_id")
+            .agg({"is_adult": "sum", "is_child": "sum"})
+            .rename(
+                columns={"is_adult": "num_adults", "is_child": "num_children"}
+            )
+        )
+
+        # Ensure household is indexed properly
+        household_indexed = household.set_index("household_id")
+
+        # Get counts aligned with household order
+        num_adults = hh_counts.loc[
+            household_indexed.index, "num_adults"
+        ].values
+        num_children = hh_counts.loc[
+            household_indexed.index, "num_children"
+        ].values
+
+        # Map ACS tenure to SPM tenure codes
+        tenure_codes = map_tenure_acs_to_spm(household_indexed["TEN"].values)
+
+        # Calculate SPM thresholds using national-level values
+        # (ACS doesn't have Census-provided geographic adjustments)
+        acs["spm_unit_spm_threshold"] = calculate_spm_thresholds_national(
+            num_adults=num_adults,
+            num_children=num_children,
+            tenure_codes=tenure_codes,
+            year=time_period,
+        )
 
     @staticmethod
     def add_household_variables(acs: h5py.File, household: DataFrame) -> None:
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index f932e0d5..90fc72c3 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -78,7 +78,7 @@ def generate(self):
             undocumented_students_target=0.21 * 1.9e6,
         )
         logging.info("Adding family variables")
-        add_spm_variables(cps, spm_unit)
+        add_spm_variables(cps, spm_unit, self.time_period)
         logging.info("Adding household variables")
         add_household_variables(cps, household)
         logging.info("Adding rent")
@@ -602,7 +602,15 @@ def add_personal_income_variables(
         cps[f"{var}_would_be_qualified"] = rng.random(len(person)) < prob
 
 
-def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None:
+def add_spm_variables(
+    cps: h5py.File,
+    spm_unit: DataFrame,
+    time_period: int,
+) -> None:
+    from policyengine_us_data.utils.spm import (
+        calculate_spm_thresholds_with_geoadj,
+    )
+
     SPM_RENAMES = dict(
         spm_unit_total_income_reported="SPM_TOTVAL",
         snap_reported="SPM_SNAPSUB",
@@ -616,7 +624,6 @@ def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None:
         # State tax includes refundable credits.
         spm_unit_state_tax_reported="SPM_STTAX",
         spm_unit_capped_work_childcare_expenses="SPM_CAPWKCCXPNS",
-        spm_unit_spm_threshold="SPM_POVTHRESHOLD",
         spm_unit_net_income_reported="SPM_RESOURCES",
         spm_unit_pre_subsidy_childcare_expenses="SPM_CHILDCAREXPNS",
     )
@@ -625,6 +632,16 @@ def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None:
         if asec_variable in spm_unit.columns:
             cps[openfisca_variable] = spm_unit[asec_variable]
 
+    # Calculate SPM thresholds using spm-calculator with Census-provided
+    # geographic adjustment factors (SPM_GEOADJ)
+    cps["spm_unit_spm_threshold"] = calculate_spm_thresholds_with_geoadj(
+        num_adults=spm_unit["SPM_NUMADULTS"].values,
+        num_children=spm_unit["SPM_NUMKIDS"].values,
+        tenure_codes=spm_unit["SPM_TENMORTSTATUS"].values,
+        geoadj=spm_unit["SPM_GEOADJ"].values,
+        year=time_period,
+    )
+
     cps["reduced_price_school_meals_reported"] = (
         cps["free_school_meals_reported"] * 0
     )
diff --git a/policyengine_us_data/utils/spm.py b/policyengine_us_data/utils/spm.py
new file mode 100644
index 00000000..05c04234
--- /dev/null
+++ b/policyengine_us_data/utils/spm.py
@@ -0,0 +1,122 @@
+"""SPM threshold calculation utilities using the spm-calculator package."""
+
+import numpy as np
+from spm_calculator import SPMCalculator, spm_equivalence_scale
+
+
+# Census CPS SPM_TENMORTSTATUS codes to spm-calculator tenure mapping
+# Based on IPUMS SPMMORT documentation:
+# 1 = Owner with mortgage
+# 2 = Owner without mortgage
+# 3 = Renter
+TENURE_CODE_MAP = {
+    1: "owner_with_mortgage",
+    2: "owner_without_mortgage",
+    3: "renter",
+}
+
+
+def calculate_spm_thresholds_with_geoadj(
+    num_adults: np.ndarray,
+    num_children: np.ndarray,
+    tenure_codes: np.ndarray,
+    geoadj: np.ndarray,
+    year: int,
+) -> np.ndarray:
+    """
+    Calculate SPM thresholds using Census-provided geographic adjustments.
+
+    This function uses the SPM_GEOADJ values already computed by the Census
+    Bureau, combined with spm-calculator's base thresholds and equivalence
+    scale formula. This avoids the need for a Census API key.
+
+    Args:
+        num_adults: Array of number of adults (18+) in each SPM unit.
+        num_children: Array of number of children (<18) in each SPM unit.
+        tenure_codes: Array of Census tenure/mortgage status codes.
+            1 = owner with mortgage, 2 = owner without mortgage, 3 = renter.
+        geoadj: Array of Census SPM_GEOADJ geographic adjustment factors.
+        year: The year for which to calculate thresholds.
+
+    Returns:
+        Array of SPM threshold values.
+    """
+    calc = SPMCalculator(year=year)
+    base_thresholds = calc.get_base_thresholds()
+
+    n = len(num_adults)
+    thresholds = np.zeros(n)
+
+    for i in range(n):
+        tenure_str = TENURE_CODE_MAP.get(int(tenure_codes[i]), "renter")
+        base = base_thresholds[tenure_str]
+        equiv_scale = spm_equivalence_scale(
+            int(num_adults[i]), int(num_children[i])
+        )
+        thresholds[i] = base * equiv_scale * geoadj[i]
+
+    return thresholds
+
+
+def calculate_spm_thresholds_national(
+    num_adults: np.ndarray,
+    num_children: np.ndarray,
+    tenure_codes: np.ndarray,
+    year: int,
+) -> np.ndarray:
+    """
+    Calculate SPM thresholds using national-level thresholds (no geoadj).
+
+    This is used for datasets like ACS that don't have pre-computed
+    geographic adjustment factors.
+
+    Args:
+        num_adults: Array of number of adults (18+) in each SPM unit.
+        num_children: Array of number of children (<18) in each SPM unit.
+        tenure_codes: Array of Census tenure/mortgage status codes.
+            1 = owner with mortgage, 2 = owner without mortgage, 3 = renter.
+        year: The year for which to calculate thresholds.
+
+    Returns:
+        Array of SPM threshold values using national averages.
+    """
+    calc = SPMCalculator(year=year)
+    base_thresholds = calc.get_base_thresholds()
+
+    n = len(num_adults)
+    thresholds = np.zeros(n)
+
+    for i in range(n):
+        tenure_str = TENURE_CODE_MAP.get(int(tenure_codes[i]), "renter")
+        base = base_thresholds[tenure_str]
+        equiv_scale = spm_equivalence_scale(
+            int(num_adults[i]), int(num_children[i])
+        )
+        # No geographic adjustment for national-level thresholds
+        thresholds[i] = base * equiv_scale
+
+    return thresholds
+
+
+def map_tenure_acs_to_spm(tenure_type: np.ndarray) -> np.ndarray:
+    """
+    Map ACS tenure type values to spm-calculator tenure codes.
+
+    Args:
+        tenure_type: Array of ACS TEN values.
+            1 = Owned with mortgage/loan
+            2 = Owned free and clear
+            3 = Rented
+
+    Returns:
+        Array of tenure code integers matching Census SPM format.
+    """
+    # ACS TEN codes map directly to Census SPM codes:
+    # ACS 1 (owned with mortgage) -> Census 1 (owner_with_mortgage)
+    # ACS 2 (owned outright) -> Census 2 (owner_without_mortgage)
+    # ACS 3 (rented) -> Census 3 (renter)
+    return np.where(
+        np.isin(tenure_type, [1, 2, 3]),
+        tenure_type,
+        3,  # Default to renter for unknown values
+    )
diff --git a/pyproject.toml b/pyproject.toml
index 3d00d389..f80c7c7f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,6 +42,7 @@ dependencies = [
     "sqlalchemy>=2.0.41",
     "sqlmodel>=0.0.24",
     "xlrd>=2.0.2",
+    "spm-calculator @ git+https://github.com/PolicyEngine/spm-calculator.git",
 ]
 
 [project.optional-dependencies]

From d8a5beeb28d991f2f413f5c1d17f9ccc27c9a0b8 Mon Sep 17 00:00:00 2001
From: David Trimmer <david@policyengine.org>
Date: Fri, 5 Dec 2025 10:33:20 -0500
Subject: [PATCH 03/10] Clear notebook outputs to fix MyST documentation build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The long_term_projections.ipynb notebook outputs were causing a
"useOutputsContext must be used within a OutputsContextProvider"
error in MyST's book-theme during documentation build.

Clearing the outputs resolves this issue.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 docs/long_term_projections.ipynb | 495 +------------------------------
 1 file changed, 16 insertions(+), 479 deletions(-)

diff --git a/docs/long_term_projections.ipynb b/docs/long_term_projections.ipynb
index 10b07b72..b99ab2ac 100644
--- a/docs/long_term_projections.ipynb
+++ b/docs/long_term_projections.ipynb
@@ -24,7 +24,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "execution": {
      "iopub.execute_input": "2025-11-19T19:56:10.232617Z",
@@ -33,139 +33,7 @@
      "shell.execute_reply": "2025-11-19T19:57:25.892382Z"
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TEST_LITE == False\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "======================================================================\r\n",
-      "HOUSEHOLD-LEVEL INCOME TAX PROJECTION: 2025-2027\r\n",
-      "======================================================================\r\n",
-      "\r\n",
-      "Configuration:\r\n",
-      "  Base year: 2024 (CPS microdata)\r\n",
-      "  Projection: 2025-2027\r\n",
-      "  Calculation level: HOUSEHOLD ONLY (simplified)\r\n",
-      "  Calibration method: GREG\r\n",
-      "  Including Social Security benefits constraint: Yes\r\n",
-      "  Including taxable payroll constraint: Yes\r\n",
-      "  Saving year-specific .h5 files: Yes (to ./projected_datasets/)\r\n",
-      "  Years to process: 3\r\n",
-      "  Estimated time: ~9 minutes\r\n",
-      "\r\n",
-      "======================================================================\r\n",
-      "STEP 1: DEMOGRAPHIC PROJECTIONS\r\n",
-      "======================================================================\r\n",
-      "\r\n",
-      "Loaded SSA projections: 86 ages x 3 years\r\n",
-      "\r\n",
-      "Population projections:\r\n",
-      "  2025:  346.6M\r\n",
-      "  2027:  350.9M\r\n",
-      "\r\n",
-      "======================================================================\r\n",
-      "STEP 2: BUILDING HOUSEHOLD AGE COMPOSITION\r\n",
-      "======================================================================\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "Loaded 21,532 households\r\n",
-      "Household age matrix shape: (21532, 86)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "======================================================================\r\n",
-      "STEP 3: HOUSEHOLD-LEVEL PROJECTION\r\n",
-      "======================================================================\r\n",
-      "\r\n",
-      "Methodology (SIMPLIFIED):\r\n",
-      "  1. PolicyEngine uprates to each projection year\r\n",
-      "  2. Calculate all values at household level (map_to='household')\r\n",
-      "  3. IPF/GREG adjusts weights to match SSA demographics\r\n",
-      "  4. Apply calibrated weights directly (no aggregation needed)\r\n",
-      "\r\n",
-      "Initial memory usage: 1.13 GB\r\n",
-      "\r\n",
-      "Year    Population    Income Tax    Baseline Tax    Memory\r\n",
-      "-----------------------------------------------------------------\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  [DEBUG 2025] SS baseline: $1424.6B, target: $1609.0B\r\n",
-      "  [DEBUG 2025] Payroll baseline: $8950.9B, target: $10621.0B\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  [DEBUG 2025] SS achieved: $1609.0B (error: -0.0%)\r\n",
-      "  [DEBUG 2025] Payroll achieved: $10621.0B (error: -0.0%)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  Saved 2025.h5\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2025      346.6M     $ 2543.1B     $ 1882.9B     4.32GB\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  [DEBUG 2027] SS baseline: $1495.1B, target: $1799.9B\r\n",
-      "  [DEBUG 2027] Payroll baseline: $9718.4B, target: $11627.0B\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  [DEBUG 2027] SS achieved: $1799.9B (error: -0.0%)\r\n",
-      "  [DEBUG 2027] Payroll achieved: $11627.0B (error: 0.0%)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  Saved 2027.h5\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2027      350.9M     $ 2873.8B     $ 2125.0B     4.62GB\r\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Save calibrated datasets as .h5 files for each year\n",
     "!python ../policyengine_us_data/datasets/cps/long_term/run_household_projection.py 2027 --greg --use-ss --use-payroll --save-h5"
@@ -227,7 +95,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "execution": {
      "iopub.execute_input": "2025-11-19T19:57:25.894248Z",
@@ -236,23 +104,7 @@
      "shell.execute_reply": "2025-11-19T19:57:28.259297Z"
     }
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/baogorek/envs/pe/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "TEST_LITE == False\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import pandas as pd\n",
     "import numpy as np\n",
@@ -264,7 +116,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {
     "execution": {
      "iopub.execute_input": "2025-11-19T19:57:28.260599Z",
@@ -273,149 +125,7 @@
      "shell.execute_reply": "2025-11-19T19:57:28.275040Z"
     }
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Year</th>\n",
-       "      <th>Age</th>\n",
-       "      <th>Total</th>\n",
-       "      <th>M Tot</th>\n",
-       "      <th>M Sin</th>\n",
-       "      <th>M Mar</th>\n",
-       "      <th>M Wid</th>\n",
-       "      <th>M Div</th>\n",
-       "      <th>F Tot</th>\n",
-       "      <th>F Sin</th>\n",
-       "      <th>F Mar</th>\n",
-       "      <th>F Wid</th>\n",
-       "      <th>F Div</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1941</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2492508</td>\n",
-       "      <td>1276328</td>\n",
-       "      <td>1276328</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1216180</td>\n",
-       "      <td>1216180</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1941</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2384290</td>\n",
-       "      <td>1218524</td>\n",
-       "      <td>1218524</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1165766</td>\n",
-       "      <td>1165766</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1941</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2445054</td>\n",
-       "      <td>1246078</td>\n",
-       "      <td>1246078</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1198976</td>\n",
-       "      <td>1198976</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1941</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2395999</td>\n",
-       "      <td>1219543</td>\n",
-       "      <td>1219543</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1176456</td>\n",
-       "      <td>1176456</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1941</td>\n",
-       "      <td>4</td>\n",
-       "      <td>2275425</td>\n",
-       "      <td>1157612</td>\n",
-       "      <td>1157612</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1117813</td>\n",
-       "      <td>1117813</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   Year  Age    Total    M Tot    M Sin  M Mar  M Wid  M Div    F Tot  \\\n",
-       "0  1941    0  2492508  1276328  1276328      0      0      0  1216180   \n",
-       "1  1941    1  2384290  1218524  1218524      0      0      0  1165766   \n",
-       "2  1941    2  2445054  1246078  1246078      0      0      0  1198976   \n",
-       "3  1941    3  2395999  1219543  1219543      0      0      0  1176456   \n",
-       "4  1941    4  2275425  1157612  1157612      0      0      0  1117813   \n",
-       "\n",
-       "     F Sin  F Mar  F Wid  F Div  \n",
-       "0  1216180      0      0      0  \n",
-       "1  1165766      0      0      0  \n",
-       "2  1198976      0      0      0  \n",
-       "3  1176456      0      0      0  \n",
-       "4  1117813      0      0      0  "
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Load SSA population data\n",
     "ssa_pop = pd.read_csv(STORAGE_FOLDER / 'SSPopJul_TR2024.csv')\n",
@@ -424,7 +134,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "execution": {
      "iopub.execute_input": "2025-11-19T19:57:28.276266Z",
@@ -433,101 +143,7 @@
      "shell.execute_reply": "2025-11-19T19:57:28.279840Z"
     }
    },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>year</th>\n",
-       "      <th>oasdi_cost_in_billion_2025_usd</th>\n",
-       "      <th>cpi_w_intermediate</th>\n",
-       "      <th>oasdi_cost_in_billion_nominal_usd</th>\n",
-       "      <th>taxable_payroll_in_billion_nominal_usd</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>2025</td>\n",
-       "      <td>1609</td>\n",
-       "      <td>100.00</td>\n",
-       "      <td>1609.0000</td>\n",
-       "      <td>10621.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2026</td>\n",
-       "      <td>1660</td>\n",
-       "      <td>102.49</td>\n",
-       "      <td>1701.3340</td>\n",
-       "      <td>11129.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2027</td>\n",
-       "      <td>1715</td>\n",
-       "      <td>104.95</td>\n",
-       "      <td>1799.8925</td>\n",
-       "      <td>11627.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>2028</td>\n",
-       "      <td>1763</td>\n",
-       "      <td>107.47</td>\n",
-       "      <td>1894.6961</td>\n",
-       "      <td>12159.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>2029</td>\n",
-       "      <td>1810</td>\n",
-       "      <td>110.05</td>\n",
-       "      <td>1991.9050</td>\n",
-       "      <td>12696.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   year  oasdi_cost_in_billion_2025_usd  cpi_w_intermediate  \\\n",
-       "0  2025                            1609              100.00   \n",
-       "1  2026                            1660              102.49   \n",
-       "2  2027                            1715              104.95   \n",
-       "3  2028                            1763              107.47   \n",
-       "4  2029                            1810              110.05   \n",
-       "\n",
-       "   oasdi_cost_in_billion_nominal_usd  taxable_payroll_in_billion_nominal_usd  \n",
-       "0                          1609.0000                                 10621.0  \n",
-       "1                          1701.3340                                 11129.0  \n",
-       "2                          1799.8925                                 11627.0  \n",
-       "3                          1894.6961                                 12159.0  \n",
-       "4                          1991.9050                                 12696.0  "
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Load Social Security auxiliary data\n",
     "ss_aux = pd.read_csv(STORAGE_FOLDER / 'social_security_aux.csv')\n",
@@ -590,7 +206,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {
     "execution": {
      "iopub.execute_input": "2025-11-19T19:57:28.280998Z",
@@ -609,7 +225,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {
     "execution": {
      "iopub.execute_input": "2025-11-19T19:57:28.283285Z",
@@ -618,18 +234,7 @@
      "shell.execute_reply": "2025-11-19T19:57:28.296600Z"
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Age distribution targets for 2025:\n",
-      "Shape: (86, 1)\n",
-      "Total population: 346577.3M\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Get SSA population targets for a specific year\n",
     "year = 2025\n",
@@ -641,7 +246,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {
     "execution": {
      "iopub.execute_input": "2025-11-19T19:57:28.297698Z",
@@ -650,16 +255,7 @@
      "shell.execute_reply": "2025-11-19T19:57:28.299938Z"
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Social Security benefit target for 2025: $1609.0B\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Get Social Security benefit target\n",
     "ss_target = load_ssa_benefit_projections(year)\n",
@@ -707,7 +303,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {
     "execution": {
      "iopub.execute_input": "2025-11-19T19:57:28.300920Z",
@@ -716,66 +312,7 @@
      "shell.execute_reply": "2025-11-19T19:59:04.747122Z"
     }
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "================================================================================\n",
-      "WHARTON COMPARISON PIPELINE - YEAR 2054\n",
-      "================================================================================\n",
-      "\n",
-      "Running PolicyEngine analysis...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Loading dataset: hf://policyengine/test/2054.h5\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Ranking according to quantiles with: household_net_income\n",
-      "✓ Analysis complete\n",
-      "  Revenue impact: $-579.1B\n",
-      "\n",
-      "Generating comparison table...\n",
-      "\n",
-      "================================================================================\n",
-      "COMPARISON TABLE: 2054\n",
-      "================================================================================\n",
-      "\n",
-      "Average Tax Change (per household):\n",
-      "   Income Group PolicyEngine Wharton Difference\n",
-      " First quintile         $-95     $-5       $-90\n",
-      "Second quintile      $-1,054   $-275      $-779\n",
-      "Middle quintile      $-2,241 $-1,730      $-511\n",
-      "Fourth quintile      $-4,633 $-3,560    $-1,073\n",
-      "         80-90%      $-6,737 $-4,075    $-2,662\n",
-      "         90-95%     $-12,121 $-4,385    $-7,736\n",
-      "         95-99%      $-8,066 $-4,565    $-3,501\n",
-      "       99-99.9%      $-7,257 $-4,820    $-2,437\n",
-      "       Top 0.1%      $-8,615 $-5,080    $-3,535\n",
-      "\n",
-      "Percent Change in Income:\n",
-      "   Income Group                  PE % Wharton %\n",
-      " First quintile -0.10000000149011612%      0.0%\n",
-      "Second quintile    0.800000011920929%      0.3%\n",
-      "Middle quintile    1.100000023841858%      1.3%\n",
-      "Fourth quintile    1.399999976158142%      1.6%\n",
-      "         80-90%    1.399999976158142%      1.2%\n",
-      "         90-95%   1.7000000476837158%      0.9%\n",
-      "         95-99%    0.699999988079071%      0.6%\n",
-      "       99-99.9%  0.30000001192092896%      0.2%\n",
-      "       Top 0.1%  0.10000000149011612%      0.0%\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import sys\n",
     "import os\n",
@@ -969,7 +506,7 @@
     "# Run analysis\n",
     "print(\"Running PolicyEngine analysis...\")\n",
     "pe_results, revenue_impact = run_analysis(dataset_path, year, income_rank_variable)\n",
-    "print(f\"✓ Analysis complete\")\n",
+    "print(f\"\u2713 Analysis complete\")\n",
     "print(f\"  Revenue impact: ${revenue_impact:.1f}B\")\n",
     "print()\n",
     "\n",

From 74286690d914df83251f631297348eb847dbe071 Mon Sep 17 00:00:00 2001
From: David Trimmer <david@policyengine.org>
Date: Fri, 5 Dec 2025 11:08:52 -0500
Subject: [PATCH 04/10] Convert long_term_projections.ipynb to markdown to fix
 MyST build
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Jupyter notebook was causing "useOutputsContext must be used within
a OutputsContextProvider" errors in the MyST book-theme. Converting to
markdown preserves all content while avoiding the rendering bug.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 docs/long_term_projections.ipynb | 552 -------------------------------
 docs/long_term_projections.md    | 348 +++++++++++++++++++
 docs/myst.yml                    |   2 +-
 3 files changed, 349 insertions(+), 553 deletions(-)
 delete mode 100644 docs/long_term_projections.ipynb
 create mode 100644 docs/long_term_projections.md

diff --git a/docs/long_term_projections.ipynb b/docs/long_term_projections.ipynb
deleted file mode 100644
index b99ab2ac..00000000
--- a/docs/long_term_projections.ipynb
+++ /dev/null
@@ -1,552 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": "# Long Term Projections\n## Integrating Economic Uprating with Demographic Reweighting"
-  },
-  {
-   "cell_type": "markdown",
-   "source": "## Executive Summary\n\nThis document outlines an innovative approach for projecting federal income tax revenue through 2100 that uniquely combines sophisticated economic microsimulation with demographic reweighting. By harmonizing PolicyEngine's state-of-the-art tax modeling with Social Security Administration demographic projections, we can isolate and quantify the fiscal impact of population aging while preserving the full complexity of the tax code.",
-   "metadata": {}
-  },
-  {
-   "cell_type": "markdown",
-   "source": "## The Challenge\n\nProjecting tax revenue over a 75-year horizon requires simultaneously modeling two distinct but interrelated dynamics:\n\n**Economic Evolution**: How incomes, prices, and tax parameters change over time\n- Wage growth and income distribution shifts\n- Inflation affecting brackets and deductions\n- Legislative changes and indexing rules\n- Behavioral responses to tax policy\n\n**Demographic Transformation**: How the population structure evolves\n- Baby boom generation aging through retirement\n- Declining birth rates reducing working-age population\n- Increasing longevity extending retirement duration\n- Shifting household composition patterns\n\nTraditional approaches typically sacrifice either economic sophistication (using simplified tax calculations) or demographic realism (holding age distributions constant). Our methodology preserves both.",
-   "metadata": {}
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Run projections using `run_household_projection.py`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-11-19T19:56:10.232617Z",
-     "iopub.status.busy": "2025-11-19T19:56:10.232523Z",
-     "iopub.status.idle": "2025-11-19T19:57:25.892674Z",
-     "shell.execute_reply": "2025-11-19T19:57:25.892382Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Save calibrated datasets as .h5 files for each year\n",
-    "!python ../policyengine_us_data/datasets/cps/long_term/run_household_projection.py 2027 --greg --use-ss --use-payroll --save-h5"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Arguments:**\n",
-    "- `END_YEAR`: Target year for projection (default: 2035)\n",
-    "- `--greg`: Use GREG calibration instead of IPF (optional)\n",
-    "- `--use-ss`: Include Social Security benefit totals as calibration target (requires --greg)\n",
-    "- `--use-payroll`: Include taxable payroll as calibration target (requires --greg)\n",
-    "- `--save-h5`: Save year-specific .h5 files to `./projected_datasets/` directory"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## The Challenge\n",
-    "\n",
-    "Projecting tax revenue over a 75-year horizon requires simultaneously modeling two distinct but interrelated dynamics:\n",
-    "\n",
-    "**Economic Evolution**: How incomes, prices, and tax parameters change over time\n",
-    "- Wage growth and income distribution shifts\n",
-    "- Inflation affecting brackets and deductions\n",
-    "- Legislative changes and indexing rules\n",
-    "- Behavioral responses to tax policy\n",
-    "\n",
-    "**Demographic Transformation**: How the population structure evolves\n",
-    "- Baby boom generation aging through retirement\n",
-    "- Declining birth rates reducing working-age population\n",
-    "- Increasing longevity extending retirement duration\n",
-    "- Shifting household composition patterns\n",
-    "\n",
-    "Traditional approaches typically sacrifice either economic sophistication (using simplified tax calculations) or demographic realism (holding age distributions constant). Our methodology preserves both."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Data Sources\n",
-    "\n",
-    "The long-term projections use two key SSA datasets:\n",
-    "\n",
-    "1. **SSA Population Projections** (`SSPopJul_TR2024.csv`)\n",
-    " - Source: [SSA 2024 Trustees Report - Single Year Age Demographic Projections](https://www.ssa.gov/oact/HistEst/Population/2024/Population2024.html)\n",
-    " - Contains age-specific population projections through 2100\n",
-    " - Used for demographic reweighting to match future population structure\n",
-    "\n",
-    "2. **Social Security Cost Projections** (`social_security_aux.csv`)\n",
-    " - Source: [SSA 2025 Trustees Report, Table VI.G9](https://www.ssa.gov/oact/TR/2025/index.html)\n",
-    " - Contains OASDI benefit cost projections in CPI-indexed 2025 dollars\n",
-    " - Used as calibration target in GREG method to ensure fiscal consistency"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-11-19T19:57:25.894248Z",
-     "iopub.status.busy": "2025-11-19T19:57:25.894165Z",
-     "iopub.status.idle": "2025-11-19T19:57:28.259561Z",
-     "shell.execute_reply": "2025-11-19T19:57:28.259297Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "from pathlib import Path\n",
-    "\n",
-    "from policyengine_us_data.storage import STORAGE_FOLDER"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-11-19T19:57:28.260599Z",
-     "iopub.status.busy": "2025-11-19T19:57:28.260502Z",
-     "iopub.status.idle": "2025-11-19T19:57:28.275248Z",
-     "shell.execute_reply": "2025-11-19T19:57:28.275040Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Load SSA population data\n",
-    "ssa_pop = pd.read_csv(STORAGE_FOLDER / 'SSPopJul_TR2024.csv')\n",
-    "ssa_pop.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-11-19T19:57:28.276266Z",
-     "iopub.status.busy": "2025-11-19T19:57:28.276171Z",
-     "iopub.status.idle": "2025-11-19T19:57:28.280120Z",
-     "shell.execute_reply": "2025-11-19T19:57:28.279840Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Load Social Security auxiliary data\n",
-    "ss_aux = pd.read_csv(STORAGE_FOLDER / 'social_security_aux.csv')\n",
-    "ss_aux.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Core Innovation\n",
-    "\n",
-    "Our approach operates in two complementary stages:\n",
-    "\n",
-    "### Stage 1: Economic Uprating\n",
-    "\n",
-    "PolicyEngine's microsimulation engine projects each household's economic circumstances forward using:\n",
-    "\n",
-    "**Sophisticated Income Modeling**\n",
-    "\n",
-    "The system models 17 distinct income categories, each uprated according to its economic fundamentals:\n",
-    "\n",
-    "*Primary Categories with Specific Projections:*\n",
-    "- Employment income (wages) - follows CBO wage growth projections\n",
-    "- Self-employment income - follows CBO business income projections\n",
-    "- Capital gains - follows CBO asset appreciation projections\n",
-    "- Interest income - follows CBO interest rate projections\n",
-    "- Dividend income - follows CBO corporate profit projections\n",
-    "- Pension income - follows CBO retirement income projections\n",
-    "- Social Security - follows SSA COLA projections (available through 2100)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Stage 2: Demographic Reweighting\n",
-    "\n",
-    "We offer two calibration methods for adjusting household weights to match SSA projections:\n",
-    "\n",
-    "**Method 1: Iterative Proportional Fitting (IPF)**\n",
-    "- Traditional raking approach using Kullback-Leibler divergence\n",
-    "- Iteratively adjusts weights to match marginal distributions\n",
-    "- Robust to specification and always produces non-negative weights\n",
-    "- Default method for backward compatibility\n",
-    "\n",
-    "**Method 2: Generalized Regression (GREG) Calibration**\n",
-    "- Modern calibration using chi-squared distance minimization\n",
-    "- Enables simultaneous calibration to categorical AND continuous variables\n",
-    "- Direct solution via matrix operations (no iteration needed)\n",
-    "- Required for incorporating Social Security benefit constraints"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Demonstrating the Calibration Methods"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-11-19T19:57:28.280998Z",
-     "iopub.status.busy": "2025-11-19T19:57:28.280918Z",
-     "iopub.status.idle": "2025-11-19T19:57:28.282636Z",
-     "shell.execute_reply": "2025-11-19T19:57:28.282472Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "from policyengine_us_data.datasets.cps.long_term.ssa_data import (\n",
-    "    load_ssa_age_projections,\n",
-    "    load_ssa_benefit_projections\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-11-19T19:57:28.283285Z",
-     "iopub.status.busy": "2025-11-19T19:57:28.283217Z",
-     "iopub.status.idle": "2025-11-19T19:57:28.296829Z",
-     "shell.execute_reply": "2025-11-19T19:57:28.296600Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Get SSA population targets for a specific year\n",
-    "year = 2025\n",
-    "age_targets = load_ssa_age_projections(end_year=year)\n",
-    "print(f\"\\nAge distribution targets for {year}:\")\n",
-    "print(f\"Shape: {age_targets.shape}\")\n",
-    "print(f\"Total population: {age_targets[:, 0].sum() / 1000:.1f}M\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-11-19T19:57:28.297698Z",
-     "iopub.status.busy": "2025-11-19T19:57:28.297611Z",
-     "iopub.status.idle": "2025-11-19T19:57:28.300109Z",
-     "shell.execute_reply": "2025-11-19T19:57:28.299938Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# Get Social Security benefit target\n",
-    "ss_target = load_ssa_benefit_projections(year)\n",
-    "print(f\"\\nSocial Security benefit target for {year}: ${ss_target / 1e9:.1f}B\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### PWBM Analysis: Eliminating Income Taxes on Social Security Benefits\n",
-    "\n",
-    "**Source:** [Eliminating Income Taxes on Social Security Benefits](https://budgetmodel.wharton.upenn.edu/issues/2025/2/10/eliminating-income-taxes-on-social-security-benefits) (Penn Wharton Budget Model, February 10, 2025)\n",
-    "\n",
-    "---\n",
-    "\n",
-    "#### Policy Analyzed\n",
-    "The Penn Wharton Budget Model (PWBM) analyzed a policy proposal to permanently eliminate all income taxes on Social Security benefits, effective January 1, 2025.\n",
-    "\n",
-    "#### Key Findings\n",
-    "\n",
-    "* **Budgetary Impact:** The policy is projected to reduce federal revenues by **$1.45 trillion** over the 10-year budget window (2025-2034). Over the long term, it is projected to increase federal debt by 7 percent by 2054, relative to the current baseline.\n",
-    "\n",
-    "* **Macroeconomic Impact:** The analysis finds the policy would have negative long-term effects on the economy.\n",
-    "    * It reduces incentives for households to save for retirement and to work.\n",
-    "    * This leads to a smaller capital stock (projected to be 4.2% lower by 2054).\n",
-    "    * The smaller capital stock results in lower average wages (1.8% lower by 2054) and lower GDP (2.1% lower by 2054).\n",
-    "\n",
-    "* **Conventional Distributional Impact (Your Table):** The table you shared shows the annual \"conventional\" effects on household after-tax income.\n",
-    "    * The largest average *dollar* tax cuts go to households in the top 20 percent of the income distribution (quintiles 80-100%).\n",
-    "    * The largest *relative* gains (as a percentage of income) go to households in the fourth quintile (60-80%), who see a 1.6% increase in after-tax income by 2054.\n",
-    "    * The dollar amounts shown are in **nominal dollars** for each specified year, not adjusted to a single base year.\n",
-    "\n",
-    "* **Dynamic (Lifetime) Impact:** When analyzing the policy's effects over a household's entire lifetime, PWBM finds:\n",
-    "    * The policy primarily benefits high-income households who are nearing or in retirement.\n",
-    "    * It negatively impacts all households under the age of 30 and all future generations, who would experience a net welfare loss due to the long-term effects of lower wages and higher federal debt."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### PolicyEngine's Analysis of Eliminating Income Taxes on Social Security Benefits"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2025-11-19T19:57:28.300920Z",
-     "iopub.status.busy": "2025-11-19T19:57:28.300844Z",
-     "iopub.status.idle": "2025-11-19T19:59:04.747344Z",
-     "shell.execute_reply": "2025-11-19T19:59:04.747122Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "import os\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import gc\n",
-    "\n",
-    "from policyengine_us import Microsimulation\n",
-    "from policyengine_core.reforms import Reform\n",
-    "\n",
-    "WHARTON_BENCHMARKS = {\n",
-    "    2026: {\n",
-    "        'First quintile': {'tax_change': 0, 'pct_change': 0.0},\n",
-    "        'Second quintile': {'tax_change': -15, 'pct_change': 0.0},\n",
-    "        'Middle quintile': {'tax_change': -340, 'pct_change': 0.5},\n",
-    "        'Fourth quintile': {'tax_change': -1135, 'pct_change': 1.1},\n",
-    "        '80-90%': {'tax_change': -1625, 'pct_change': 1.0},\n",
-    "        '90-95%': {'tax_change': -1590, 'pct_change': 0.7},\n",
-    "        '95-99%': {'tax_change': -2020, 'pct_change': 0.5},\n",
-    "        '99-99.9%': {'tax_change': -2205, 'pct_change': 0.2},\n",
-    "        'Top 0.1%': {'tax_change': -2450, 'pct_change': 0.0},\n",
-    "    },\n",
-    "    2034: {\n",
-    "        'First quintile': {'tax_change': 0, 'pct_change': 0.0},\n",
-    "        'Second quintile': {'tax_change': -45, 'pct_change': 0.1},\n",
-    "        'Middle quintile': {'tax_change': -615, 'pct_change': 0.8},\n",
-    "        'Fourth quintile': {'tax_change': -1630, 'pct_change': 1.2},\n",
-    "        '80-90%': {'tax_change': -2160, 'pct_change': 1.1},\n",
-    "        '90-95%': {'tax_change': -2160, 'pct_change': 0.7},\n",
-    "        '95-99%': {'tax_change': -2605, 'pct_change': 0.6},\n",
-    "        '99-99.9%': {'tax_change': -2715, 'pct_change': 0.2},\n",
-    "        'Top 0.1%': {'tax_change': -2970, 'pct_change': 0.0},\n",
-    "    },\n",
-    "    2054: {\n",
-    "        'First quintile': {'tax_change': -5, 'pct_change': 0.0},\n",
-    "        'Second quintile': {'tax_change': -275, 'pct_change': 0.3},\n",
-    "        'Middle quintile': {'tax_change': -1730, 'pct_change': 1.3},\n",
-    "        'Fourth quintile': {'tax_change': -3560, 'pct_change': 1.6},\n",
-    "        '80-90%': {'tax_change': -4075, 'pct_change': 1.2},\n",
-    "        '90-95%': {'tax_change': -4385, 'pct_change': 0.9},\n",
-    "        '95-99%': {'tax_change': -4565, 'pct_change': 0.6},\n",
-    "        '99-99.9%': {'tax_change': -4820, 'pct_change': 0.2},\n",
-    "        'Top 0.1%': {'tax_change': -5080, 'pct_change': 0.0},\n",
-    "    },\n",
-    "}\n",
-    "\n",
-    "def run_analysis(dataset_path, year, income_rank_var = \"household_net_income\"):\n",
-    "    \"\"\"Run Option 1 analysis for given dataset and year\"\"\"\n",
-    "\n",
-    "    option1_reform = Reform.from_dict(\n",
-    "        {\n",
-    "            # Base rate parameters (0-50% bracket)\n",
-    "            \"gov.irs.social_security.taxability.rate.base.benefit_cap\": {\n",
-    "                \"2026-01-01.2100-12-31\": 0\n",
-    "            },\n",
-    "            \"gov.irs.social_security.taxability.rate.base.excess\": {\n",
-    "                \"2026-01-01.2100-12-31\": 0\n",
-    "            },\n",
-    "            # Additional rate parameters (50-85% bracket)\n",
-    "            \"gov.irs.social_security.taxability.rate.additional.benefit_cap\": {\n",
-    "                \"2026-01-01.2100-12-31\": 0\n",
-    "            },\n",
-    "            \"gov.irs.social_security.taxability.rate.additional.bracket\": {\n",
-    "                \"2026-01-01.2100-12-31\": 0\n",
-    "            },\n",
-    "            \"gov.irs.social_security.taxability.rate.additional.excess\": {\n",
-    "                \"2026-01-01.2100-12-31\": 0\n",
-    "            }\n",
-    "        }, country_id=\"us\"\n",
-    "    )\n",
-    "    reform = Microsimulation(dataset=dataset_path, reform=option1_reform)\n",
-    "\n",
-    "    # Get household data\n",
-    "    household_net_income_reform = reform.calculate(\"household_net_income\", period=year, map_to=\"household\")\n",
-    "    household_agi_reform = reform.calculate(\"adjusted_gross_income\", period=year, map_to=\"household\")\n",
-    "    income_tax_reform = reform.calculate(\"income_tax\", period=year, map_to=\"household\")\n",
-    "\n",
-    "    del reform\n",
-    "    gc.collect()\n",
-    "\n",
-    "    print(f\"Loading dataset: {dataset_path}\")\n",
-    "    baseline = Microsimulation(dataset=dataset_path)\n",
-    "    household_weight = baseline.calculate(\"household_weight\", period=year)\n",
-    "    household_net_income_baseline = baseline.calculate(\"household_net_income\", period=year, map_to=\"household\")\n",
-    "    household_agi_baseline = baseline.calculate(\"adjusted_gross_income\", period=year, map_to=\"household\")\n",
-    "    income_tax_baseline = baseline.calculate(\"income_tax\", period=year, map_to=\"household\")\n",
-    "\n",
-    "    # Calculate changes\n",
-    "    tax_change = income_tax_reform - income_tax_baseline\n",
-    "    income_change_pct = (\n",
-    "        (household_net_income_reform - household_net_income_baseline) / household_net_income_baseline\n",
-    "    ) * 100\n",
-    "\n",
-    "    # Create DataFrame\n",
-    "    df = pd.DataFrame({\n",
-    "        'household_net_income': household_net_income_baseline,\n",
-    "        'weight': household_weight,\n",
-    "        'tax_change': tax_change,\n",
-    "        'income_change_pct': income_change_pct,\n",
-    "        'income_rank_var': baseline.calculate(income_rank_var, year, map_to=\"household\")\n",
-    "    })\n",
-    "    \n",
-    "    # Calculate percentiles\n",
-    "\n",
-    "    print(f\"Ranking according to quantiles with: {income_rank_var}\")\n",
-    "    df['income_percentile'] = df['income_rank_var'].rank(pct=True) * 100\n",
-    "\n",
-    "    # Assign income groups\n",
-    "    def assign_income_group(percentile):\n",
-    "        if percentile <= 20:\n",
-    "            return 'First quintile'\n",
-    "        elif percentile <= 40:\n",
-    "            return 'Second quintile'\n",
-    "        elif percentile <= 60:\n",
-    "            return 'Middle quintile'\n",
-    "        elif percentile <= 80:\n",
-    "            return 'Fourth quintile'\n",
-    "        elif percentile <= 90:\n",
-    "            return '80-90%'\n",
-    "        elif percentile <= 95:\n",
-    "            return '90-95%'\n",
-    "        elif percentile <= 99:\n",
-    "            return '95-99%'\n",
-    "        elif percentile <= 99.9:\n",
-    "            return '99-99.9%'\n",
-    "        else:\n",
-    "            return 'Top 0.1%'\n",
-    "\n",
-    "    df['income_group'] = df['income_percentile'].apply(assign_income_group)\n",
-    "\n",
-    "    # Calculate aggregate revenue\n",
-    "    revenue_impact = (income_tax_reform.sum() - income_tax_baseline.sum()) / 1e9\n",
-    "\n",
-    "    # Calculate by group\n",
-    "    results = []\n",
-    "    for group in ['First quintile', 'Second quintile', 'Middle quintile', 'Fourth quintile',\n",
-    "                  '80-90%', '90-95%', '95-99%', '99-99.9%', 'Top 0.1%']:\n",
-    "        group_data = df[df['income_group'] == group]\n",
-    "        if len(group_data) == 0:\n",
-    "            continue\n",
-    "\n",
-    "        total_weight = group_data['weight'].sum()\n",
-    "        avg_tax_change = (group_data['tax_change'] * group_data['weight']).sum() / total_weight\n",
-    "        avg_income_change_pct = (group_data['income_change_pct'] * group_data['weight']).sum() / total_weight\n",
-    "\n",
-    "        results.append({\n",
-    "            'group': group,\n",
-    "            'pe_tax_change': round(avg_tax_change),\n",
-    "            'pe_pct_change': round(avg_income_change_pct, 1),\n",
-    "        })\n",
-    "\n",
-    "    return pd.DataFrame(results), revenue_impact\n",
-    "\n",
-    "def generate_comparison_table(pe_results, year):\n",
-    "    \"\"\"Generate comparison table with Wharton benchmark\"\"\"\n",
-    "\n",
-    "    if year not in WHARTON_BENCHMARKS:\n",
-    "        print(f\"Warning: No Wharton benchmark available for year {year}\")\n",
-    "        return pe_results\n",
-    "\n",
-    "    wharton_data = WHARTON_BENCHMARKS[year]\n",
-    "\n",
-    "    comparison = []\n",
-    "    for _, row in pe_results.iterrows():\n",
-    "        group = row['group']\n",
-    "        wharton = wharton_data.get(group, {'tax_change': None, 'pct_change': None})\n",
-    "\n",
-    "        pe_tax = row['pe_tax_change']\n",
-    "        wh_tax = wharton['tax_change']\n",
-    "\n",
-    "        comparison.append({\n",
-    "            'Income Group': group,\n",
-    "            'PolicyEngine': f\"${pe_tax:,}\",\n",
-    "            'Wharton': f\"${wh_tax:,}\" if wh_tax is not None else 'N/A',\n",
-    "            'Difference': f\"${(pe_tax - wh_tax):,}\" if wh_tax is not None else 'N/A',\n",
-    "            'PE %': f\"{row['pe_pct_change']}%\",\n",
-    "            'Wharton %': f\"{wharton['pct_change']}%\" if wharton['pct_change'] is not None else 'N/A',\n",
-    "        })\n",
-    "\n",
-    "    return pd.DataFrame(comparison)\n",
-    "\n",
-    "dataset_path = 'hf://policyengine/test/2054.h5'\n",
-    "year = 2054\n",
-    "income_rank_variable = \"household_net_income\"\n",
-    "\n",
-    "print(\"=\"*80)\n",
-    "print(f\"WHARTON COMPARISON PIPELINE - YEAR {year}\")\n",
-    "print(\"=\"*80)\n",
-    "print()\n",
-    "\n",
-    "# Run analysis\n",
-    "print(\"Running PolicyEngine analysis...\")\n",
-    "pe_results, revenue_impact = run_analysis(dataset_path, year, income_rank_variable)\n",
-    "print(f\"\u2713 Analysis complete\")\n",
-    "print(f\"  Revenue impact: ${revenue_impact:.1f}B\")\n",
-    "print()\n",
-    "\n",
-    "# Generate comparison table\n",
-    "print(\"Generating comparison table...\")\n",
-    "comparison_table = generate_comparison_table(pe_results, year)\n",
-    "\n",
-    "print()\n",
-    "print(\"=\"*80)\n",
-    "print(f\"COMPARISON TABLE: {year}\")\n",
-    "print(\"=\"*80)\n",
-    "print()\n",
-    "print(\"Average Tax Change (per household):\")\n",
-    "print(comparison_table[['Income Group', 'PolicyEngine', 'Wharton', 'Difference']].to_string(index=False))\n",
-    "print()\n",
-    "print(\"Percent Change in Income:\")\n",
-    "print(comparison_table[['Income Group', 'PE %', 'Wharton %']].to_string(index=False))\n",
-    "print()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.13.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
\ No newline at end of file
diff --git a/docs/long_term_projections.md b/docs/long_term_projections.md
new file mode 100644
index 00000000..cc5d14c0
--- /dev/null
+++ b/docs/long_term_projections.md
@@ -0,0 +1,348 @@
+# Long Term Projections
+## Integrating Economic Uprating with Demographic Reweighting
+
+## Executive Summary
+
+This document outlines an innovative approach for projecting federal income tax revenue through 2100 that uniquely combines sophisticated economic microsimulation with demographic reweighting. By harmonizing PolicyEngine's state-of-the-art tax modeling with Social Security Administration demographic projections, we can isolate and quantify the fiscal impact of population aging while preserving the full complexity of the tax code.
+
+## The Challenge
+
+Projecting tax revenue over a 75-year horizon requires simultaneously modeling two distinct but interrelated dynamics:
+
+**Economic Evolution**: How incomes, prices, and tax parameters change over time
+- Wage growth and income distribution shifts
+- Inflation affecting brackets and deductions
+- Legislative changes and indexing rules
+- Behavioral responses to tax policy
+
+**Demographic Transformation**: How the population structure evolves
+- Baby boom generation aging through retirement
+- Declining birth rates reducing working-age population
+- Increasing longevity extending retirement duration
+- Shifting household composition patterns
+
+Traditional approaches typically sacrifice either economic sophistication (using simplified tax calculations) or demographic realism (holding age distributions constant). Our methodology preserves both.
+
+## Running Projections
+
+Run projections using `run_household_projection.py`:
+
+```bash
+# Save calibrated datasets as .h5 files for each year
+python ../policyengine_us_data/datasets/cps/long_term/run_household_projection.py 2027 --greg --use-ss --use-payroll --save-h5
+```
+
+**Arguments:**
+- `END_YEAR`: Target year for projection (default: 2035)
+- `--greg`: Use GREG calibration instead of IPF (optional)
+- `--use-ss`: Include Social Security benefit totals as calibration target (requires --greg)
+- `--use-payroll`: Include taxable payroll as calibration target (requires --greg)
+- `--save-h5`: Save year-specific .h5 files to `./projected_datasets/` directory
+
+## Data Sources
+
+The long-term projections use two key SSA datasets:
+
+1. **SSA Population Projections** (`SSPopJul_TR2024.csv`)
+   - Source: [SSA 2024 Trustees Report - Single Year Age Demographic Projections](https://www.ssa.gov/oact/HistEst/Population/2024/Population2024.html)
+   - Contains age-specific population projections through 2100
+   - Used for demographic reweighting to match future population structure
+
+2. **Social Security Cost Projections** (`social_security_aux.csv`)
+   - Source: [SSA 2025 Trustees Report, Table VI.G9](https://www.ssa.gov/oact/TR/2025/index.html)
+   - Contains OASDI benefit cost projections in CPI-indexed 2025 dollars
+   - Used as calibration target in GREG method to ensure fiscal consistency
+
+### Loading SSA Data
+
+```python
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from pathlib import Path
+
+from policyengine_us_data.storage import STORAGE_FOLDER
+
+# Load SSA population data
+ssa_pop = pd.read_csv(STORAGE_FOLDER / 'SSPopJul_TR2024.csv')
+ssa_pop.head()
+
+# Load Social Security auxiliary data
+ss_aux = pd.read_csv(STORAGE_FOLDER / 'social_security_aux.csv')
+ss_aux.head()
+```
+
+## Core Innovation
+
+Our approach operates in two complementary stages:
+
+### Stage 1: Economic Uprating
+
+PolicyEngine's microsimulation engine projects each household's economic circumstances forward using:
+
+**Sophisticated Income Modeling**
+
+The system models 17 distinct income categories, each uprated according to its economic fundamentals:
+
+*Primary Categories with Specific Projections:*
+- Employment income (wages) - follows CBO wage growth projections
+- Self-employment income - follows CBO business income projections
+- Capital gains - follows CBO asset appreciation projections
+- Interest income - follows CBO interest rate projections
+- Dividend income - follows CBO corporate profit projections
+- Pension income - follows CBO retirement income projections
+- Social Security - follows SSA COLA projections (available through 2100)
+
+### Stage 2: Demographic Reweighting
+
+We offer two calibration methods for adjusting household weights to match SSA projections:
+
+**Method 1: Iterative Proportional Fitting (IPF)**
+- Traditional raking approach using Kullback-Leibler divergence
+- Iteratively adjusts weights to match marginal distributions
+- Robust to specification and always produces non-negative weights
+- Default method for backward compatibility
+
+**Method 2: Generalized Regression (GREG) Calibration**
+- Modern calibration using chi-squared distance minimization
+- Enables simultaneous calibration to categorical AND continuous variables
+- Direct solution via matrix operations (no iteration needed)
+- Required for incorporating Social Security benefit constraints
+
+## Demonstrating the Calibration Methods
+
+```python
+from policyengine_us_data.datasets.cps.long_term.ssa_data import (
+    load_ssa_age_projections,
+    load_ssa_benefit_projections
+)
+
+# Get SSA population targets for a specific year
+year = 2025
+age_targets = load_ssa_age_projections(end_year=year)
+print(f"\nAge distribution targets for {year}:")
+print(f"Shape: {age_targets.shape}")
+print(f"Total population: {age_targets[:, 0].sum() / 1000:.1f}M")
+
+# Get Social Security benefit target
+ss_target = load_ssa_benefit_projections(year)
+print(f"\nSocial Security benefit target for {year}: ${ss_target / 1e9:.1f}B")
+```
+
+## PWBM Analysis: Eliminating Income Taxes on Social Security Benefits
+
+**Source:** [Eliminating Income Taxes on Social Security Benefits](https://budgetmodel.wharton.upenn.edu/issues/2025/2/10/eliminating-income-taxes-on-social-security-benefits) (Penn Wharton Budget Model, February 10, 2025)
+
+---
+
+### Policy Analyzed
+The Penn Wharton Budget Model (PWBM) analyzed a policy proposal to permanently eliminate all income taxes on Social Security benefits, effective January 1, 2025.
+
+### Key Findings
+
+* **Budgetary Impact:** The policy is projected to reduce federal revenues by **$1.45 trillion** over the 10-year budget window (2025-2034). Over the long term, it is projected to increase federal debt by 7 percent by 2054, relative to the current baseline.
+
+* **Macroeconomic Impact:** The analysis finds the policy would have negative long-term effects on the economy.
+    * It reduces incentives for households to save for retirement and to work.
+    * This leads to a smaller capital stock (projected to be 4.2% lower by 2054).
+    * The smaller capital stock results in lower average wages (1.8% lower by 2054) and lower GDP (2.1% lower by 2054).
+
+* **Conventional Distributional Impact (Your Table):** The table you shared shows the annual "conventional" effects on household after-tax income.
+    * The largest average *dollar* tax cuts go to households in the top 20 percent of the income distribution (quintiles 80-100%).
+    * The largest *relative* gains (as a percentage of income) go to households in the fourth quintile (60-80%), who see a 1.6% increase in after-tax income by 2054.
+    * The dollar amounts shown are in **nominal dollars** for each specified year, not adjusted to a single base year.
+
+* **Dynamic (Lifetime) Impact:** When analyzing the policy's effects over a household's entire lifetime, PWBM finds:
+    * The policy primarily benefits high-income households who are nearing or in retirement.
+    * It negatively impacts all households under the age of 30 and all future generations, who would experience a net welfare loss due to the long-term effects of lower wages and higher federal debt.
+
+## PolicyEngine's Analysis of Eliminating Income Taxes on Social Security Benefits
+
+```python
+import sys
+import os
+import pandas as pd
+import numpy as np
+import gc
+
+from policyengine_us import Microsimulation
+from policyengine_core.reforms import Reform
+
+WHARTON_BENCHMARKS = {
+    2026: {
+        'First quintile': {'tax_change': 0, 'pct_change': 0.0},
+        'Second quintile': {'tax_change': -15, 'pct_change': 0.0},
+        'Middle quintile': {'tax_change': -340, 'pct_change': 0.5},
+        'Fourth quintile': {'tax_change': -1135, 'pct_change': 1.1},
+        '80-90%': {'tax_change': -1625, 'pct_change': 1.0},
+        '90-95%': {'tax_change': -1590, 'pct_change': 0.7},
+        '95-99%': {'tax_change': -2020, 'pct_change': 0.5},
+        '99-99.9%': {'tax_change': -2205, 'pct_change': 0.2},
+        'Top 0.1%': {'tax_change': -2450, 'pct_change': 0.0},
+    },
+    2034: {
+        'First quintile': {'tax_change': 0, 'pct_change': 0.0},
+        'Second quintile': {'tax_change': -45, 'pct_change': 0.1},
+        'Middle quintile': {'tax_change': -615, 'pct_change': 0.8},
+        'Fourth quintile': {'tax_change': -1630, 'pct_change': 1.2},
+        '80-90%': {'tax_change': -2160, 'pct_change': 1.1},
+        '90-95%': {'tax_change': -2160, 'pct_change': 0.7},
+        '95-99%': {'tax_change': -2605, 'pct_change': 0.6},
+        '99-99.9%': {'tax_change': -2715, 'pct_change': 0.2},
+        'Top 0.1%': {'tax_change': -2970, 'pct_change': 0.0},
+    },
+    2054: {
+        'First quintile': {'tax_change': -5, 'pct_change': 0.0},
+        'Second quintile': {'tax_change': -275, 'pct_change': 0.3},
+        'Middle quintile': {'tax_change': -1730, 'pct_change': 1.3},
+        'Fourth quintile': {'tax_change': -3560, 'pct_change': 1.6},
+        '80-90%': {'tax_change': -4075, 'pct_change': 1.2},
+        '90-95%': {'tax_change': -4385, 'pct_change': 0.9},
+        '95-99%': {'tax_change': -4565, 'pct_change': 0.6},
+        '99-99.9%': {'tax_change': -4820, 'pct_change': 0.2},
+        'Top 0.1%': {'tax_change': -5080, 'pct_change': 0.0},
+    },
+}
+
+def run_analysis(dataset_path, year, income_rank_var = "household_net_income"):
+    """Run Option 1 analysis for given dataset and year"""
+
+    option1_reform = Reform.from_dict(
+        {
+            # Base rate parameters (0-50% bracket)
+            "gov.irs.social_security.taxability.rate.base.benefit_cap": {
+                "2026-01-01.2100-12-31": 0
+            },
+            "gov.irs.social_security.taxability.rate.base.excess": {
+                "2026-01-01.2100-12-31": 0
+            },
+            # Additional rate parameters (50-85% bracket)
+            "gov.irs.social_security.taxability.rate.additional.benefit_cap": {
+                "2026-01-01.2100-12-31": 0
+            },
+            "gov.irs.social_security.taxability.rate.additional.bracket": {
+                "2026-01-01.2100-12-31": 0
+            },
+            "gov.irs.social_security.taxability.rate.additional.excess": {
+                "2026-01-01.2100-12-31": 0
+            }
+        }, country_id="us"
+    )
+    reform = Microsimulation(dataset=dataset_path, reform=option1_reform)
+
+    # Get household data
+    household_net_income_reform = reform.calculate("household_net_income", period=year, map_to="household")
+    household_agi_reform = reform.calculate("adjusted_gross_income", period=year, map_to="household")
+    income_tax_reform = reform.calculate("income_tax", period=year, map_to="household")
+
+    del reform
+    gc.collect()
+
+    print(f"Loading dataset: {dataset_path}")
+    baseline = Microsimulation(dataset=dataset_path)
+    household_weight = baseline.calculate("household_weight", period=year)
+    household_net_income_baseline = baseline.calculate("household_net_income", period=year, map_to="household")
+    household_agi_baseline = baseline.calculate("adjusted_gross_income", period=year, map_to="household")
+    income_tax_baseline = baseline.calculate("income_tax", period=year, map_to="household")
+
+    # Calculate changes
+    tax_change = income_tax_reform - income_tax_baseline
+    income_change_pct = (
+        (household_net_income_reform - household_net_income_baseline) / household_net_income_baseline
+    ) * 100
+
+    # Create DataFrame
+    df = pd.DataFrame({
+        'household_net_income': household_net_income_baseline,
+        'weight': household_weight,
+        'tax_change': tax_change,
+        'income_change_pct': income_change_pct,
+        'income_rank_var': baseline.calculate(income_rank_var, year, map_to="household")
+    })
+
+    # Calculate percentiles
+
+    print(f"Ranking according to quantiles with: {income_rank_var}")
+    df['income_percentile'] = df['income_rank_var'].rank(pct=True) * 100
+
+    # Assign income groups
+    def assign_income_group(percentile):
+        if percentile <= 20:
+            return 'First quintile'
+        elif percentile <= 40:
+            return 'Second quintile'
+        elif percentile <= 60:
+            return 'Middle quintile'
+        elif percentile <= 80:
+            return 'Fourth quintile'
+        elif percentile <= 90:
+            return '80-90%'
+        elif percentile <= 95:
+            return '90-95%'
+        elif percentile <= 99:
+            return '95-99%'
+        elif percentile <= 99.9:
+            return '99-99.9%'
+        else:
+            return 'Top 0.1%'
+
+    df['income_group'] = df['income_percentile'].apply(assign_income_group)
+
+    # Calculate aggregate revenue
+    revenue_impact = (income_tax_reform.sum() - income_tax_baseline.sum()) / 1e9
+
+    # Calculate by group
+    results = []
+    for group in ['First quintile', 'Second quintile', 'Middle quintile', 'Fourth quintile',
+                  '80-90%', '90-95%', '95-99%', '99-99.9%', 'Top 0.1%']:
+        group_data = df[df['income_group'] == group]
+        if len(group_data) == 0:
+            continue
+
+        total_weight = group_data['weight'].sum()
+        avg_tax_change = (group_data['tax_change'] * group_data['weight']).sum() / total_weight
+        avg_income_change_pct = (group_data['income_change_pct'] * group_data['weight']).sum() / total_weight
+
+        results.append({
+            'group': group,
+            'pe_tax_change': round(avg_tax_change),
+            'pe_pct_change': round(avg_income_change_pct, 1),
+        })
+
+    return pd.DataFrame(results), revenue_impact
+
+def generate_comparison_table(pe_results, year):
+    """Generate comparison table with Wharton benchmark"""
+
+    if year not in WHARTON_BENCHMARKS:
+        print(f"Warning: No Wharton benchmark available for year {year}")
+        return pe_results
+
+    wharton_data = WHARTON_BENCHMARKS[year]
+
+    comparison = []
+    for _, row in pe_results.iterrows():
+        group = row['group']
+        wharton = wharton_data.get(group, {'tax_change': None, 'pct_change': None})
+
+        pe_tax = row['pe_tax_change']
+        wh_tax = wharton['tax_change']
+
+        comparison.append({
+            'Income Group': group,
+            'PolicyEngine': f"${pe_tax:,}",
+            'Wharton': f"${wh_tax:,}" if wh_tax is not None else 'N/A',
+            'Difference': f"${(pe_tax - wh_tax):,}" if wh_tax is not None else 'N/A',
+            'PE %': f"{row['pe_pct_change']}%",
+            'Wharton %': f"{wharton['pct_change']}%" if wharton['pct_change'] is not None else 'N/A',
+        })
+
+    return pd.DataFrame(comparison)
+
+# Example usage:
+# dataset_path = 'hf://policyengine/test/2054.h5'
+# year = 2054
+# income_rank_variable = "household_net_income"
+# pe_results, revenue_impact = run_analysis(dataset_path, year, income_rank_variable)
+# comparison_table = generate_comparison_table(pe_results, year)
+```
diff --git a/docs/myst.yml b/docs/myst.yml
index a39b3cfb..fbbdcbd9 100644
--- a/docs/myst.yml
+++ b/docs/myst.yml
@@ -24,7 +24,7 @@ project:
     - file: background.md
     - file: data.md
     - file: methodology.md
-    - file: long_term_projections.ipynb
+    - file: long_term_projections.md
     - file: discussion.md
     - file: conclusion.md
     - file: appendix.md

From 88749b0405dbed9b6b0384ced929fbd501ec6835 Mon Sep 17 00:00:00 2001
From: David Trimmer <david@policyengine.org>
Date: Tue, 9 Dec 2025 13:18:56 -0500
Subject: [PATCH 05/10] Use PUMA-level geographic adjustments for ACS SPM
 thresholds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, ACS SPM thresholds used national-level values only (GEOADJ=1.0).
This update uses spm-calculator's PUMA-level geographic adjustment lookup
to compute thresholds with local housing cost adjustments.

Changes:
- Add calculate_spm_thresholds_by_puma() function to spm.py
- Update ACS dataset to pass state FIPS and PUMA codes to the new function
- PUMA identifiers are constructed as 7-digit codes (2-digit state + 5-digit PUMA)

Note: Requires CENSUS_API_KEY environment variable for PUMA lookups.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 policyengine_us_data/datasets/acs/acs.py | 13 ++++--
 policyengine_us_data/utils/spm.py        | 55 ++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/policyengine_us_data/datasets/acs/acs.py b/policyengine_us_data/datasets/acs/acs.py
index 836c2a1f..d5ae6402 100644
--- a/policyengine_us_data/datasets/acs/acs.py
+++ b/policyengine_us_data/datasets/acs/acs.py
@@ -101,7 +101,7 @@ def add_spm_variables(
         time_period: int,
     ) -> None:
         from policyengine_us_data.utils.spm import (
-            calculate_spm_thresholds_national,
+            calculate_spm_thresholds_by_puma,
             map_tenure_acs_to_spm,
         )
 
@@ -133,12 +133,17 @@ def add_spm_variables(
         # Map ACS tenure to SPM tenure codes
         tenure_codes = map_tenure_acs_to_spm(household_indexed["TEN"].values)
 
-        # Calculate SPM thresholds using national-level values
-        # (ACS doesn't have Census-provided geographic adjustments)
-        acs["spm_unit_spm_threshold"] = calculate_spm_thresholds_national(
+        # Get geographic identifiers for PUMA-level adjustments
+        state_fips = household_indexed["ST"].values.astype(int)
+        puma_codes = household_indexed["PUMA"].values.astype(int)
+
+        # Calculate SPM thresholds using PUMA-level geographic adjustments
+        acs["spm_unit_spm_threshold"] = calculate_spm_thresholds_by_puma(
             num_adults=num_adults,
             num_children=num_children,
             tenure_codes=tenure_codes,
+            state_fips=state_fips,
+            puma_codes=puma_codes,
             year=time_period,
         )
 
diff --git a/policyengine_us_data/utils/spm.py b/policyengine_us_data/utils/spm.py
index 05c04234..a8c552cb 100644
--- a/policyengine_us_data/utils/spm.py
+++ b/policyengine_us_data/utils/spm.py
@@ -98,6 +98,61 @@ def calculate_spm_thresholds_national(
     return thresholds
 
 
+def calculate_spm_thresholds_by_puma(
+    num_adults: np.ndarray,
+    num_children: np.ndarray,
+    tenure_codes: np.ndarray,
+    state_fips: np.ndarray,
+    puma_codes: np.ndarray,
+    year: int,
+) -> np.ndarray:
+    """
+    Calculate SPM thresholds using PUMA-level geographic adjustments.
+
+    This function uses the spm-calculator's PUMA-level geographic adjustment
+    lookup to compute thresholds with local housing cost adjustments.
+    Requires CENSUS_API_KEY environment variable to be set.
+
+    Args:
+        num_adults: Array of number of adults (18+) in each SPM unit.
+        num_children: Array of number of children (<18) in each SPM unit.
+        tenure_codes: Array of Census tenure/mortgage status codes.
+            1 = owner with mortgage, 2 = owner without mortgage, 3 = renter.
+        state_fips: Array of 2-digit state FIPS codes (integers).
+        puma_codes: Array of 5-digit PUMA codes (integers).
+        year: The year for which to calculate thresholds.
+
+    Returns:
+        Array of SPM threshold values with PUMA-level geographic adjustments.
+    """
+    calc = SPMCalculator(year=year)
+
+    # Build 7-digit PUMA identifiers: 2-digit state + 5-digit PUMA
+    # Format as strings with proper zero-padding
+    puma_ids = np.array(
+        [
+            f"{int(state):02d}{int(puma):05d}"
+            for state, puma in zip(state_fips, puma_codes)
+        ]
+    )
+
+    # Map tenure codes to strings
+    tenure_strs = np.array(
+        [TENURE_CODE_MAP.get(int(t), "renter") for t in tenure_codes]
+    )
+
+    # Use vectorized calculation from spm-calculator
+    thresholds = calc.calculate_thresholds(
+        num_adults=num_adults.astype(int),
+        num_children=num_children.astype(int),
+        tenure=tenure_strs,
+        geography_type="puma",
+        geography_ids=puma_ids,
+    )
+
+    return thresholds
+
+
 def map_tenure_acs_to_spm(tenure_type: np.ndarray) -> np.ndarray:
     """
     Map ACS tenure type values to spm-calculator tenure codes.

From 23fce035a345fa5cd45c040556afeba210c86e45 Mon Sep 17 00:00:00 2001
From: David Trimmer <david@policyengine.org>
Date: Wed, 10 Dec 2025 13:52:26 -0500
Subject: [PATCH 06/10] Use national SPM thresholds for ACS (no geographic
 adjustment)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Switch from calculate_spm_thresholds_by_puma to calculate_spm_thresholds_national
  to avoid Census API dependency that was causing CI failures
- Geographic adjustments will be applied later in the pipeline when
  households are assigned to specific areas (per discussion with Max)
- Remove obsolete long_term_projections.ipynb reference from myst.yml

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 docs/myst.yml                            |  1 -
 policyengine_us_data/datasets/acs/acs.py | 14 +++++---------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/docs/myst.yml b/docs/myst.yml
index a07556cd..c38666af 100644
--- a/docs/myst.yml
+++ b/docs/myst.yml
@@ -26,7 +26,6 @@ project:
     - file: methodology.md
     - file: long_term_projections.md
     - file: local_area_calibration_setup.ipynb
-    - file: long_term_projections.ipynb
     - file: discussion.md
     - file: conclusion.md
     - file: appendix.md
diff --git a/policyengine_us_data/datasets/acs/acs.py b/policyengine_us_data/datasets/acs/acs.py
index d5ae6402..df227bd9 100644
--- a/policyengine_us_data/datasets/acs/acs.py
+++ b/policyengine_us_data/datasets/acs/acs.py
@@ -101,7 +101,7 @@ def add_spm_variables(
         time_period: int,
     ) -> None:
         from policyengine_us_data.utils.spm import (
-            calculate_spm_thresholds_by_puma,
+            calculate_spm_thresholds_national,
             map_tenure_acs_to_spm,
         )
 
@@ -133,17 +133,13 @@ def add_spm_variables(
         # Map ACS tenure to SPM tenure codes
         tenure_codes = map_tenure_acs_to_spm(household_indexed["TEN"].values)
 
-        # Get geographic identifiers for PUMA-level adjustments
-        state_fips = household_indexed["ST"].values.astype(int)
-        puma_codes = household_indexed["PUMA"].values.astype(int)
-
-        # Calculate SPM thresholds using PUMA-level geographic adjustments
-        acs["spm_unit_spm_threshold"] = calculate_spm_thresholds_by_puma(
+        # Calculate SPM thresholds using national-level values (no geographic
+        # adjustment). Geographic adjustments will be applied later in the
+        # pipeline when households are assigned to specific areas.
+        acs["spm_unit_spm_threshold"] = calculate_spm_thresholds_national(
             num_adults=num_adults,
             num_children=num_children,
             tenure_codes=tenure_codes,
-            state_fips=state_fips,
-            puma_codes=puma_codes,
             year=time_period,
         )
 

From ea3391dd7b9d6a27128f93eea9d9b777fc7ebf01 Mon Sep 17 00:00:00 2001
From: David Trimmer <david@policyengine.org>
Date: Fri, 12 Dec 2025 12:02:57 -0500
Subject: [PATCH 07/10] Address PR review feedback: revert ACS changes and
 simplify CPS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Revert all ACS changes - no longer touching ACS since SPM_POVTHRESHOLD
  is already available and we don't need to calculate thresholds there
- Remove time_period argument from add_spm_variables, use self.time_period
  instead to match pattern of other functions like add_rent

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 policyengine_us_data/datasets/acs/acs.py | 52 ++----------------------
 policyengine_us_data/datasets/cps/cps.py | 10 ++---
 2 files changed, 6 insertions(+), 56 deletions(-)

diff --git a/policyengine_us_data/datasets/acs/acs.py b/policyengine_us_data/datasets/acs/acs.py
index df227bd9..0ecd3ee7 100644
--- a/policyengine_us_data/datasets/acs/acs.py
+++ b/policyengine_us_data/datasets/acs/acs.py
@@ -25,7 +25,6 @@ def generate(self) -> None:
         self.add_id_variables(acs, person, household)
         self.add_person_variables(acs, person, household)
         self.add_household_variables(acs, household)
-        self.add_spm_variables(acs, person, household, self.time_period)
 
         acs.close()
         raw_data.close()
@@ -94,54 +93,9 @@ def add_person_variables(
         )
 
     @staticmethod
-    def add_spm_variables(
-        acs: h5py.File,
-        person: DataFrame,
-        household: DataFrame,
-        time_period: int,
-    ) -> None:
-        from policyengine_us_data.utils.spm import (
-            calculate_spm_thresholds_national,
-            map_tenure_acs_to_spm,
-        )
-
-        # In ACS, SPM unit = household
-        # Calculate number of adults (18+) and children (<18) per household
-        person_with_hh = person.copy()
-        person_with_hh["is_adult"] = person_with_hh["AGEP"] >= 18
-        person_with_hh["is_child"] = person_with_hh["AGEP"] < 18
-
-        hh_counts = (
-            person_with_hh.groupby("household_id")
-            .agg({"is_adult": "sum", "is_child": "sum"})
-            .rename(
-                columns={"is_adult": "num_adults", "is_child": "num_children"}
-            )
-        )
-
-        # Ensure household is indexed properly
-        household_indexed = household.set_index("household_id")
-
-        # Get counts aligned with household order
-        num_adults = hh_counts.loc[
-            household_indexed.index, "num_adults"
-        ].values
-        num_children = hh_counts.loc[
-            household_indexed.index, "num_children"
-        ].values
-
-        # Map ACS tenure to SPM tenure codes
-        tenure_codes = map_tenure_acs_to_spm(household_indexed["TEN"].values)
-
-        # Calculate SPM thresholds using national-level values (no geographic
-        # adjustment). Geographic adjustments will be applied later in the
-        # pipeline when households are assigned to specific areas.
-        acs["spm_unit_spm_threshold"] = calculate_spm_thresholds_national(
-            num_adults=num_adults,
-            num_children=num_children,
-            tenure_codes=tenure_codes,
-            year=time_period,
-        )
+    def add_spm_variables(acs: h5py.File, spm_unit: DataFrame) -> None:
+        acs["spm_unit_net_income_reported"] = spm_unit.SPM_RESOURCES
+        acs["spm_unit_spm_threshold"] = spm_unit.SPM_POVTHRESHOLD
 
     @staticmethod
     def add_household_variables(acs: h5py.File, household: DataFrame) -> None:
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 8bbf5746..9b09a38f 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -78,7 +78,7 @@ def generate(self):
             undocumented_students_target=0.21 * 1.9e6,
         )
         logging.info("Adding family variables")
-        add_spm_variables(cps, spm_unit, self.time_period)
+        add_spm_variables(self, cps, spm_unit)
         logging.info("Adding household variables")
         add_household_variables(cps, household)
         logging.info("Adding rent")
@@ -602,11 +602,7 @@ def add_personal_income_variables(
         cps[f"{var}_would_be_qualified"] = rng.random(len(person)) < prob
 
 
-def add_spm_variables(
-    cps: h5py.File,
-    spm_unit: DataFrame,
-    time_period: int,
-) -> None:
+def add_spm_variables(self, cps: h5py.File, spm_unit: DataFrame) -> None:
     from policyengine_us_data.utils.spm import (
         calculate_spm_thresholds_with_geoadj,
     )
@@ -639,7 +635,7 @@ def add_spm_variables(
         num_children=spm_unit["SPM_NUMKIDS"].values,
         tenure_codes=spm_unit["SPM_TENMORTSTATUS"].values,
         geoadj=spm_unit["SPM_GEOADJ"].values,
-        year=time_period,
+        year=self.time_period,
     )
 
     cps["reduced_price_school_meals_reported"] = (

From 0addc175e0838d35ba40090104abea54a951725d Mon Sep 17 00:00:00 2001
From: David Trimmer <david@policyengine.org>
Date: Fri, 12 Dec 2025 12:08:38 -0500
Subject: [PATCH 08/10] Remove redundant comments and unused functions from
 spm.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove redundant comment above TENURE_CODE_MAP (self-explanatory)
- Remove calculate_spm_thresholds_national (unused after ACS revert)
- Remove map_tenure_acs_to_spm (unused after ACS revert)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 policyengine_us_data/utils/spm.py | 69 -------------------------------
 1 file changed, 69 deletions(-)

diff --git a/policyengine_us_data/utils/spm.py b/policyengine_us_data/utils/spm.py
index a8c552cb..6211489c 100644
--- a/policyengine_us_data/utils/spm.py
+++ b/policyengine_us_data/utils/spm.py
@@ -4,11 +4,6 @@
 from spm_calculator import SPMCalculator, spm_equivalence_scale
 
 
-# Census CPS SPM_TENMORTSTATUS codes to spm-calculator tenure mapping
-# Based on IPUMS SPMMORT documentation:
-# 1 = Owner with mortgage
-# 2 = Owner without mortgage
-# 3 = Renter
 TENURE_CODE_MAP = {
     1: "owner_with_mortgage",
     2: "owner_without_mortgage",
@@ -58,46 +53,6 @@ def calculate_spm_thresholds_with_geoadj(
     return thresholds
 
 
-def calculate_spm_thresholds_national(
-    num_adults: np.ndarray,
-    num_children: np.ndarray,
-    tenure_codes: np.ndarray,
-    year: int,
-) -> np.ndarray:
-    """
-    Calculate SPM thresholds using national-level thresholds (no geoadj).
-
-    This is used for datasets like ACS that don't have pre-computed
-    geographic adjustment factors.
-
-    Args:
-        num_adults: Array of number of adults (18+) in each SPM unit.
-        num_children: Array of number of children (<18) in each SPM unit.
-        tenure_codes: Array of Census tenure/mortgage status codes.
-            1 = owner with mortgage, 2 = owner without mortgage, 3 = renter.
-        year: The year for which to calculate thresholds.
-
-    Returns:
-        Array of SPM threshold values using national averages.
-    """
-    calc = SPMCalculator(year=year)
-    base_thresholds = calc.get_base_thresholds()
-
-    n = len(num_adults)
-    thresholds = np.zeros(n)
-
-    for i in range(n):
-        tenure_str = TENURE_CODE_MAP.get(int(tenure_codes[i]), "renter")
-        base = base_thresholds[tenure_str]
-        equiv_scale = spm_equivalence_scale(
-            int(num_adults[i]), int(num_children[i])
-        )
-        # No geographic adjustment for national-level thresholds
-        thresholds[i] = base * equiv_scale
-
-    return thresholds
-
-
 def calculate_spm_thresholds_by_puma(
     num_adults: np.ndarray,
     num_children: np.ndarray,
@@ -151,27 +106,3 @@ def calculate_spm_thresholds_by_puma(
     )
 
     return thresholds
-
-
-def map_tenure_acs_to_spm(tenure_type: np.ndarray) -> np.ndarray:
-    """
-    Map ACS tenure type values to spm-calculator tenure codes.
-
-    Args:
-        tenure_type: Array of ACS TEN values.
-            1 = Owned with mortgage/loan
-            2 = Owned free and clear
-            3 = Rented
-
-    Returns:
-        Array of tenure code integers matching Census SPM format.
-    """
-    # ACS TEN codes map directly to Census SPM codes:
-    # ACS 1 (owned with mortgage) -> Census 1 (owner_with_mortgage)
-    # ACS 2 (owned outright) -> Census 2 (owner_without_mortgage)
-    # ACS 3 (rented) -> Census 3 (renter)
-    return np.where(
-        np.isin(tenure_type, [1, 2, 3]),
-        tenure_type,
-        3,  # Default to renter for unknown values
-    )

From ecde9650576aecc2235c2d12fe6780920ccf4588 Mon Sep 17 00:00:00 2001
From: David Trimmer <david@policyengine.org>
Date: Fri, 12 Dec 2025 12:09:54 -0500
Subject: [PATCH 09/10] Remove unused calculate_spm_thresholds_by_puma function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 policyengine_us_data/utils/spm.py | 55 -------------------------------
 1 file changed, 55 deletions(-)

diff --git a/policyengine_us_data/utils/spm.py b/policyengine_us_data/utils/spm.py
index 6211489c..070db533 100644
--- a/policyengine_us_data/utils/spm.py
+++ b/policyengine_us_data/utils/spm.py
@@ -51,58 +51,3 @@ def calculate_spm_thresholds_with_geoadj(
         thresholds[i] = base * equiv_scale * geoadj[i]
 
     return thresholds
-
-
-def calculate_spm_thresholds_by_puma(
-    num_adults: np.ndarray,
-    num_children: np.ndarray,
-    tenure_codes: np.ndarray,
-    state_fips: np.ndarray,
-    puma_codes: np.ndarray,
-    year: int,
-) -> np.ndarray:
-    """
-    Calculate SPM thresholds using PUMA-level geographic adjustments.
-
-    This function uses the spm-calculator's PUMA-level geographic adjustment
-    lookup to compute thresholds with local housing cost adjustments.
-    Requires CENSUS_API_KEY environment variable to be set.
-
-    Args:
-        num_adults: Array of number of adults (18+) in each SPM unit.
-        num_children: Array of number of children (<18) in each SPM unit.
-        tenure_codes: Array of Census tenure/mortgage status codes.
-            1 = owner with mortgage, 2 = owner without mortgage, 3 = renter.
-        state_fips: Array of 2-digit state FIPS codes (integers).
-        puma_codes: Array of 5-digit PUMA codes (integers).
-        year: The year for which to calculate thresholds.
-
-    Returns:
-        Array of SPM threshold values with PUMA-level geographic adjustments.
-    """
-    calc = SPMCalculator(year=year)
-
-    # Build 7-digit PUMA identifiers: 2-digit state + 5-digit PUMA
-    # Format as strings with proper zero-padding
-    puma_ids = np.array(
-        [
-            f"{int(state):02d}{int(puma):05d}"
-            for state, puma in zip(state_fips, puma_codes)
-        ]
-    )
-
-    # Map tenure codes to strings
-    tenure_strs = np.array(
-        [TENURE_CODE_MAP.get(int(t), "renter") for t in tenure_codes]
-    )
-
-    # Use vectorized calculation from spm-calculator
-    thresholds = calc.calculate_thresholds(
-        num_adults=num_adults.astype(int),
-        num_children=num_children.astype(int),
-        tenure=tenure_strs,
-        geography_type="puma",
-        geography_ids=puma_ids,
-    )
-
-    return thresholds

From 3c000981a112883c5386040fc4ef8e67eaf9eb13 Mon Sep 17 00:00:00 2001
From: David Trimmer <david@policyengine.org>
Date: Fri, 19 Dec 2025 12:17:34 -0500
Subject: [PATCH 10/10] switch to PyPi

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 06a7af91..14acb68e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,7 @@ dependencies = [
     "sqlalchemy>=2.0.41",
     "sqlmodel>=0.0.24",
     "xlrd>=2.0.2",
-    "spm-calculator @ git+https://github.com/PolicyEngine/spm-calculator.git",
+    "spm-calculator>=0.1.0",
 ]
 
 [project.optional-dependencies]