From e6a40a38c731fd03c8c00a1b3705d4820457dd5d Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Thu, 20 Nov 2025 07:22:49 -0500
Subject: [PATCH 01/12] Fix documentation build timeout in CI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove 10-second timeout and || true from make documentation target.
The timeout was causing builds to fail silently, resulting in missing
docs/_build/site directory and GitHub Pages deployment failures.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
index 994173d1..7bcad6c2 100644
--- a/Makefile
+++ b/Makefile
@@ -33,8 +33,8 @@ documentation:
 	rm -rf _build .jupyter_cache && \
 	rm -f _toc.yml && \
 	myst clean && \
-	timeout 10 myst build --html || true
-	cd docs && test -d _build/site && touch _build/site/.nojekyll || true
+	myst build --html
+	cd docs && test -d _build/site && touch _build/site/.nojekyll
 
 documentation-build:
 	cd docs && \

From 1378d94b510231a1258510ebf17427dd58991073 Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Thu, 20 Nov 2025 08:45:03 -0500
Subject: [PATCH 02/12] Add Node.js setup to workflow for MyST documentation
 builds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MyST requires Node.js to build documentation. Added setup-node@v4
step to install Node.js 20 before package installation.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .github/workflows/reusable_test.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/reusable_test.yaml b/.github/workflows/reusable_test.yaml
index 401efd17..87dccec6 100644
--- a/.github/workflows/reusable_test.yaml
+++ b/.github/workflows/reusable_test.yaml
@@ -45,6 +45,11 @@ jobs:
         with:
           python-version: '3.13'
 
+      - name: Set up Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+
       - uses: "google-github-actions/auth@v2"
         if: inputs.upload_data
         with:

From b556492364c9c756b7f13a809be91e95a92dd0b8 Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Thu, 20 Nov 2025 10:33:48 -0500
Subject: [PATCH 03/12] Add index.html bootstrap for MyST GitHub Pages
 deployment

---
 Makefile        |  1 +
 docs/index.html | 12 ++++++++++++
 2 files changed, 13 insertions(+)
 create mode 100644 docs/index.html

diff --git a/Makefile b/Makefile
index 7bcad6c2..89f473db 100644
--- a/Makefile
+++ b/Makefile
@@ -35,6 +35,7 @@ documentation:
 	myst clean && \
 	myst build --html
 	cd docs && test -d _build/site && touch _build/site/.nojekyll
+	cd docs && test ! -f _build/site/index.html && cp index.html _build/site/index.html || true
 
 documentation-build:
 	cd docs && \
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 00000000..b6aff700
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,12 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>PolicyEngine US Data</title>
+  <script type="module" crossorigin src="https://unpkg.com/@myst-theme/site@latest/dist/myst-theme-site.js"></script>
+</head>
+<body>
+  <myst-site></myst-site>
+</body>
+</html>

From 515a8c25d15065b61805e9c93bfcd5c9dc4fadae Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Thu, 20 Nov 2025 12:54:31 -0500
Subject: [PATCH 04/12] Fix GitHub Pages deployment to use _build/html instead
 of _build/site
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MyST generates static HTML files in _build/html for static hosting,
while _build/site contains dynamic content for MyST server. GitHub
Pages requires the static HTML files.

Changes:
- Deploy docs/_build/html instead of docs/_build/site
- Update Makefile to touch .nojekyll in correct directory
- Remove manual index.html (MyST generates this automatically)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .github/workflows/reusable_test.yaml |  2 +-
 Makefile                             |  3 +--
 docs/index.html                      | 12 ------------
 3 files changed, 2 insertions(+), 15 deletions(-)
 delete mode 100644 docs/index.html

diff --git a/.github/workflows/reusable_test.yaml b/.github/workflows/reusable_test.yaml
index 87dccec6..e09e6d94 100644
--- a/.github/workflows/reusable_test.yaml
+++ b/.github/workflows/reusable_test.yaml
@@ -99,5 +99,5 @@ jobs:
         uses: JamesIves/github-pages-deploy-action@v4
         with:
           branch: gh-pages
-          folder: docs/_build/site
+          folder: docs/_build/html
           clean: true
diff --git a/Makefile b/Makefile
index 89f473db..78d0904d 100644
--- a/Makefile
+++ b/Makefile
@@ -34,8 +34,7 @@ documentation:
 	rm -f _toc.yml && \
 	myst clean && \
 	myst build --html
-	cd docs && test -d _build/site && touch _build/site/.nojekyll
-	cd docs && test ! -f _build/site/index.html && cp index.html _build/site/index.html || true
+	cd docs && test -d _build/html && touch _build/html/.nojekyll || true
 
 documentation-build:
 	cd docs && \
diff --git a/docs/index.html b/docs/index.html
deleted file mode 100644
index b6aff700..00000000
--- a/docs/index.html
+++ /dev/null
@@ -1,12 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-  <meta charset="utf-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1">
-  <title>PolicyEngine US Data</title>
-  <script type="module" crossorigin src="https://unpkg.com/@myst-theme/site@latest/dist/myst-theme-site.js"></script>
-</head>
-<body>
-  <myst-site></myst-site>
-</body>
-</html>

From 7f4579e731ae5f47ea8237c10c30a9ed14041298 Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Thu, 20 Nov 2025 13:58:55 -0500
Subject: [PATCH 05/12] Add docs/README.md documenting MyST build outputs
 pitfall
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Documents critical distinction between _build/html/ (for static hosting)
and _build/site/ (for development server) to prevent future deployment
mistakes.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 docs/README.md | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 docs/README.md

diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 00000000..d6ab94c3
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,46 @@
+# Documentation
+
+This project uses [MyST Markdown](https://mystmd.org/) for documentation.
+
+## Building Locally
+
+### Requirements
+- Python 3.13+ with dev dependencies: `uv pip install -e .[dev] --system`
+- Node.js 20+ (required by MyST)
+
+### Commands
+```bash
+make documentation        # Build static HTML files
+make documentation-serve  # Serve locally on http://localhost:8080
+```
+
+## Important: MyST Build Outputs
+
+**MyST creates two different outputs - DO NOT confuse them:**
+
+- `_build/html/` - **Static HTML files (use for GitHub Pages deployment)**
+- `_build/site/` - Dynamic content for `myst start` development server only
+
+**GitHub Pages must deploy `_build/html/`**, not `_build/site/`. The `_build/site/` directory contains JSON files for MyST's development server and will result in a blank page on GitHub Pages.
+
+## GitHub Pages Deployment
+
+- Site URL: https://policyengine.github.io/policyengine-us-data/
+- Deployed from: `docs/_build/html/` directory
+- Propagation time: 5-10 minutes after push to gh-pages branch
+- Workflow: `.github/workflows/code_changes.yaml` (on main branch only)
+
+## Troubleshooting
+
+**Blank page after deployment:**
+- Check that workflow deploys `folder: docs/_build/html` (not `_build/site`)
+- Wait 5-10 minutes for GitHub Pages propagation
+- Hard refresh browser (Ctrl+Shift+R / Cmd+Shift+R)
+
+**Build fails in CI:**
+- Ensure Node.js setup step exists in workflow (MyST requires Node.js)
+- Never add timeouts or `|| true` to build commands - they mask failures
+
+**Missing index.html:**
+- MyST auto-generates index.html in `_build/html/`
+- Do not create manual index.html in docs/

From 559397cbee46f72e42b77a091dedf3e673f53a1b Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Thu, 20 Nov 2025 19:26:41 -0500
Subject: [PATCH 06/12] Add changelog entry for documentation deployment fixes

---
 changelog_entry.yaml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29b..4aad48fa 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,10 @@
+- bump: patch
+  changes:
+    fixed:
+    - GitHub Pages documentation deployment (was deploying wrong directory causing blank pages)
+    - Removed timeout and error suppression from documentation build
+    added:
+    - Node.js setup to CI workflow for MyST builds
+    - start_year parameter to load_ssa_age_projections function
+    - MAX_SINGLE_AGE constant to replace hardcoded values
+    - docs/README.md documenting MyST build output pitfall

From 22e4e9fada2ee40e2e115d250898dcec07f5e9c0 Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Fri, 21 Nov 2025 19:23:46 -0500
Subject: [PATCH 07/12] Add H6 Social Security reform calibration to long-term
 projections
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add load_h6_income_rate_change() to ssa_data.py to load reform ratio targets
- Extend GREG calibration to support H6 revenue impact constraints
- Add --use-h6-reform flag to run_household_projection.py
- Implement H6 reform that phases out SS benefit taxation (2045-2054)
- Use absolute revenue targets (ratio × payroll) for linear GREG constraints
- Skip H6 computation for years with zero reform effect (2025-2044)

The H6 reform calibration ensures microsimulation results match SSA Trustee
Report projections for the revenue impact of phasing out Social Security
benefit taxation.

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
---
 docs/abstract.md                              |  11 +-
 docs/appendix.md                              |  93 ++++++++++-
 docs/conclusion.md                            |   2 +-
 docs/discussion.md                            |   4 +-
 docs/introduction.md                          |   6 +-
 ...2100.ipynb => long_term_projections.ipynb} |  35 ++--
 docs/methodology.md                           |  17 +-
 docs/myst.yml                                 |   2 +-
 .../datasets/cps/long_term/calibration.py     |  25 ++-
 .../cps/long_term/run_household_projection.py | 116 ++++++++++++-
 .../datasets/cps/long_term/ssa_data.py        |  18 ++
 .../storage/social_security_aux.csv           | 154 +++++++++---------
 12 files changed, 357 insertions(+), 126 deletions(-)
 rename docs/{pwbm_ss_comparison_2025_2100.ipynb => long_term_projections.ipynb} (96%)

diff --git a/docs/abstract.md b/docs/abstract.md
index 6741cd2d..b6963c11 100644
--- a/docs/abstract.md
+++ b/docs/abstract.md
@@ -6,15 +6,6 @@ quantile regression forests to impute 67 tax variables from the PUF onto CPS rec
 preserving distributional characteristics while maintaining household composition and member
 relationships. The imputation process alone does not guarantee consistency with official
 statistics, necessitating a reweighting step to align the combined dataset with known
-population totals and administrative benchmarks. We apply a reweighting algorithm that
- calibrates the dataset to 2,813 targets from
-the IRS Statistics of Income, Census population projections, Congressional Budget
-Office benefit program estimates, Treasury
-expenditure data, Joint Committee on Taxation tax expenditure estimates, healthcare
-spending patterns, and other benefit program costs. The reweighting employs dropout-regularized
- gradient descent optimization
-to ensure consistency with administrative benchmarks. Validation shows the enhanced dataset
-reduces error in key tax components by [TO BE CALCULATED]% relative to the baseline CPS.
-The dataset maintains the CPS's demographic detail and geographic granularity while
+population totals and administrative benchmarks. We apply a reweighting algorithm that calibrates the dataset to 2,813 targets from the IRS Statistics of Income, Census population projections, Congressional Budget Office benefit program estimates, Treasury expenditure data, Joint Committee on Taxation tax expenditure estimates, healthcare spending patterns, and other benefit program costs. The reweighting employs dropout-regularized gradient descent optimization to ensure consistency with administrative benchmarks. The dataset maintains the CPS's demographic detail and geographic granularity while
 incorporating tax reporting data from administrative sources. We release the enhanced
 dataset, source code, and documentation to support policy analysis.
diff --git a/docs/appendix.md b/docs/appendix.md
index 2d9d0062..7bc41f73 100644
--- a/docs/appendix.md
+++ b/docs/appendix.md
@@ -46,4 +46,95 @@ for iteration in range(5000):
 
 ### Table A1: Complete List of Imputed Variables
 
-[TO BE GENERATED - Complete list of 72 imputed variables from PUF organized by category]
\ No newline at end of file
+#### Variables Imputed from IRS Public Use File (67 variables)
+
+**Income Variables:**
+- employment_income
+- partnership_s_corp_income
+- social_security
+- taxable_pension_income
+- tax_exempt_pension_income
+- long_term_capital_gains
+- short_term_capital_gains
+- taxable_ira_distributions
+- self_employment_income
+- qualified_dividend_income
+- non_qualified_dividend_income
+- rental_income
+- taxable_unemployment_compensation
+- taxable_interest_income
+- tax_exempt_interest_income
+- estate_income
+- miscellaneous_income
+- farm_income
+- alimony_income
+- farm_rent_income
+- non_sch_d_capital_gains
+- long_term_capital_gains_on_collectibles
+- unrecaptured_section_1250_gain
+- salt_refund_income
+
+**Deductions and Adjustments:**
+- interest_deduction
+- unreimbursed_business_employee_expenses
+- pre_tax_contributions
+- charitable_cash_donations
+- self_employed_pension_contribution_ald
+- domestic_production_ald
+- self_employed_health_insurance_ald
+- charitable_non_cash_donations
+- alimony_expense
+- health_savings_account_ald
+- student_loan_interest
+- investment_income_elected_form_4952
+- early_withdrawal_penalty
+- educator_expense
+- deductible_mortgage_interest
+
+**Tax Credits:**
+- cdcc_relevant_expenses
+- foreign_tax_credit
+- american_opportunity_credit
+- general_business_credit
+- energy_efficient_home_improvement_credit
+- amt_foreign_tax_credit
+- excess_withheld_payroll_tax
+- savers_credit
+- prior_year_minimum_tax_credit
+- other_credits
+
+**Qualified Business Income Variables:**
+- w2_wages_from_qualified_business
+- unadjusted_basis_qualified_property
+- business_is_sstb
+- qualified_reit_and_ptp_income
+- qualified_bdc_income
+- farm_operations_income
+- estate_income_would_be_qualified
+- farm_operations_income_would_be_qualified
+- farm_rent_income_would_be_qualified
+- partnership_s_corp_income_would_be_qualified
+- rental_income_would_be_qualified
+- self_employment_income_would_be_qualified
+
+**Other Tax Variables:**
+- traditional_ira_contributions
+- qualified_tuition_expenses
+- casualty_loss
+- unreported_payroll_tax
+- recapture_of_investment_credit
+
+#### Variables Imputed from Survey of Income and Program Participation (1 variable)
+
+- tip_income
+
+#### Variables Imputed from Survey of Consumer Finances (3 variables)
+
+- networth
+- auto_loan_balance
+- auto_loan_interest
+
+#### Variables Imputed from American Community Survey (2 variables)
+
+- rent
+- real_estate_taxes
\ No newline at end of file
diff --git a/docs/conclusion.md b/docs/conclusion.md
index 7bd2c5a4..519c9f8e 100644
--- a/docs/conclusion.md
+++ b/docs/conclusion.md
@@ -18,7 +18,7 @@ Our work makes several key contributions:
 
 The validation results demonstrate that combining survey and administrative data through principled statistical methods can achieve:
 - Improved income distribution representation
-- Better alignment with program participation totals  
+- Better alignment with program participation totals
 - Maintained demographic and geographic detail
 - Suitable accuracy for policy simulation
 
diff --git a/docs/discussion.md b/docs/discussion.md
index 1bbda6c7..476a813f 100644
--- a/docs/discussion.md
+++ b/docs/discussion.md
@@ -8,7 +8,7 @@ We examine the strengths, limitations, and potential applications of the Enhance
 
 The Enhanced CPS uniquely combines:
 - Demographic detail from the CPS including state identifiers
-- Tax precision from IRS administrative data  
+- Tax precision from IRS administrative data
 - Calibration to contemporary official statistics
 - Open-source availability for research use
 
@@ -26,7 +26,7 @@ The large-scale calibration to 2,813 targets ensures consistency with administra
 
 ### Practical Advantages
 
-For policy analysis, the dataset offers state-level geographic detail enabling subnational analysis, household structure for distributional studies, tax detail for revenue estimation, program participation for benefit analysis, and recent data calibrated to current totals.
+For policy analysis, the dataset offers several key features: state-level geographic detail for subnational analysis, household structure for distributional studies, tax detail for revenue estimation, program participation for benefit analysis, and calibration to current administrative totals.
 
 ## Limitations
 
diff --git a/docs/introduction.md b/docs/introduction.md
index ddcb5e56..78d9591d 100644
--- a/docs/introduction.md
+++ b/docs/introduction.md
@@ -1,10 +1,10 @@
 # Introduction
 
-Microsimulation models require high-quality microdata that accurately represents both demographic characteristics and economic outcomes. The ideal dataset would combine the demographic richness and household structure of surveys with the income precision of administrative tax records. However, publicly available datasets typically excel in one dimension while lacking in the other.
+Microsimulation models require high-quality microdata that accurately represent demographic characteristics and economic outcomes. The ideal dataset would combine the demographic richness and household structure of surveys with the income precision of administrative tax records. However, publicly available datasets typically excel in one dimension while lacking in the other.
 
 The Current Population Survey (CPS) Annual Social and Economic Supplement provides detailed household demographics, family relationships, and program participation data for a representative sample of US households. However, it suffers from well-documented income underreporting, particularly at the top of the distribution. The IRS Public Use File (PUF) contains accurate tax return information but lacks household structure, demographic detail, and state identifiers needed for comprehensive policy analysis.
 
-This paper presents a methodology for creating an Enhanced CPS dataset that combines the strengths of both sources. Through an enhancement process—imputation followed by reweighting—we create a dataset suitable for analyzing both tax and transfer policies at federal and state levels.
+This paper presents a methodology for creating an Enhanced CPS dataset that combines the strengths of both sources. Through an enhancement process: imputation followed by reweighting, we create a dataset suitable for analyzing both tax and transfer policies at federal and state levels.
 
 ## Related Work
 
@@ -24,4 +24,4 @@ Our empirical contribution involves creating and validating a publicly available
 
 From a practical perspective, we provide open-source tools and comprehensive documentation that enable researchers to apply these methods, modify the approach, or build upon our work. This transparency contrasts with existing proprietary models and supports reproducible research. Government agencies could use our framework to enhance their own microsimulation capabilities, while academic researchers gain access to data suitable for analyzing distributional impacts of tax and transfer policies. The modular design allows incremental improvements as new data sources become available.
 
-We organize the remainder of this paper as follows. Section 2 describes our data sources including the primary datasets and calibration targets. Section 3 details the enhancement methodology including both the imputation and reweighting stages. Section 4 presents validation results comparing performance across datasets. Section 5 discusses limitations, applications, and future directions. Section 6 concludes with implications for policy analysis.
\ No newline at end of file
+We organize the remainder of this paper as follows. Section 2 describes our data sources including the primary datasets and calibration targets. Section 3 details the enhancement methodology including both the imputation and reweighting stages. Section 4 presents validation results comparing performance across datasets. Section 5 discusses limitations, applications, and future directions. Section 6 concludes with implications for policy analysis.
diff --git a/docs/pwbm_ss_comparison_2025_2100.ipynb b/docs/long_term_projections.ipynb
similarity index 96%
rename from docs/pwbm_ss_comparison_2025_2100.ipynb
rename to docs/long_term_projections.ipynb
index 3296e4cb..10b07b72 100644
--- a/docs/pwbm_ss_comparison_2025_2100.ipynb
+++ b/docs/long_term_projections.ipynb
@@ -3,10 +3,17 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": [
-    "# Comparison to Penn Wharton Budget Model: Eliminating Tax on Social Security 2025-2100\n",
-    "## Integrating Economic Uprating with Demographic Reweighting"
-   ]
+   "source": "# Long Term Projections\n## Integrating Economic Uprating with Demographic Reweighting"
+  },
+  {
+   "cell_type": "markdown",
+   "source": "## Executive Summary\n\nThis document outlines an innovative approach for projecting federal income tax revenue through 2100 that uniquely combines sophisticated economic microsimulation with demographic reweighting. By harmonizing PolicyEngine's state-of-the-art tax modeling with Social Security Administration demographic projections, we can isolate and quantify the fiscal impact of population aging while preserving the full complexity of the tax code.",
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": "## The Challenge\n\nProjecting tax revenue over a 75-year horizon requires simultaneously modeling two distinct but interrelated dynamics:\n\n**Economic Evolution**: How incomes, prices, and tax parameters change over time\n- Wage growth and income distribution shifts\n- Inflation affecting brackets and deductions\n- Legislative changes and indexing rules\n- Behavioral responses to tax policy\n\n**Demographic Transformation**: How the population structure evolves\n- Baby boom generation aging through retirement\n- Declining birth rates reducing working-age population\n- Increasing longevity extending retirement duration\n- Shifting household composition patterns\n\nTraditional approaches typically sacrifice either economic sophistication (using simplified tax calculations) or demographic realism (holding age distributions constant). Our methodology preserves both.",
+   "metadata": {}
   },
   {
    "cell_type": "markdown",
@@ -176,17 +183,6 @@
     "- `--save-h5`: Save year-specific .h5 files to `./projected_datasets/` directory"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "---\n",
-    "\n",
-    "## Executive Summary\n",
-    "\n",
-    "This document outlines an innovative approach for projecting federal income tax revenue through 2100 that uniquely combines sophisticated economic microsimulation with demographic reweighting. By harmonizing PolicyEngine's state-of-the-art tax modeling with Social Security Administration demographic projections, we can isolate and quantify the fiscal impact of population aging while preserving the full complexity of the tax code."
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -210,13 +206,6 @@
     "Traditional approaches typically sacrifice either economic sophistication (using simplified tax calculations) or demographic realism (holding age distributions constant). Our methodology preserves both."
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Loading and Exploring the Data"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1023,4 +1012,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/docs/methodology.md b/docs/methodology.md
index 77ad6317..603acda6 100644
--- a/docs/methodology.md
+++ b/docs/methodology.md
@@ -147,7 +147,20 @@ From the American Community Survey (ACS), we impute property taxes for homeowner
 
 ### Example: Tip Income Imputation
 
-To illustrate how QRF preserves conditional distributions, consider tip income imputation. The training data from SIPP contains workers with employment income and tip income. For a worker with predictors of $30,000 employment income, age 25, and no children, QRF finds that similar workers in SIPP have a conditional distribution ranging from $0 at the 10th percentile (no tips) to $2,000 at the median, $8,000 at the 90th percentile, and $15,000 at the 99th percentile. If the random quantile drawn is 0.85, the imputed tip income would be approximately $6,500. This approach ensures that some similar workers receive no tips while others receive substantial tips, preserving realistic variation.
+To illustrate how QRF preserves conditional distributions, consider tip income imputation. The training data from SIPP contains workers with employment income and tip income.
+
+For a worker with the following characteristics:
+- Employment income: \$30,000
+- Age: 25
+- Number of children: 0
+
+QRF finds that similar workers in SIPP have a conditional distribution of tip income:
+- 10th percentile: \$0 (no tips)
+- 50th percentile: \$2,000
+- 90th percentile: \$8,000
+- 99th percentile: \$15,000
+
+If the random quantile drawn is 0.85, the imputed tip income would be approximately \$6,500. This approach ensures that some similar workers receive no tips while others receive substantial tips, preserving realistic variation.
 
 ## Stage 2: Reweighting
 
@@ -185,7 +198,7 @@ The calibration process incorporates tax and benefit calculations through Policy
 
 ### Convergence
 
-The optimization converges within iterations. We monitor convergence through the loss value trajectory, weight stability across iterations, and target achievement rates.
+The optimization converges within 500 epochs. We monitor convergence through the loss value trajectory, weight stability across iterations, and target achievement rates.
 
 ## Validation
 
diff --git a/docs/myst.yml b/docs/myst.yml
index 37ddbbe5..a39b3cfb 100644
--- a/docs/myst.yml
+++ b/docs/myst.yml
@@ -24,7 +24,7 @@ project:
     - file: background.md
     - file: data.md
     - file: methodology.md
-    - file: pwbm_ss_comparison_2025_2100.ipynb
+    - file: long_term_projections.ipynb
     - file: discussion.md
     - file: conclusion.md
     - file: appendix.md
diff --git a/policyengine_us_data/datasets/cps/long_term/calibration.py b/policyengine_us_data/datasets/cps/long_term/calibration.py
index 6a8d293e..694312fc 100644
--- a/policyengine_us_data/datasets/cps/long_term/calibration.py
+++ b/policyengine_us_data/datasets/cps/long_term/calibration.py
@@ -83,6 +83,8 @@ def calibrate_greg(
     ss_target=None,
     payroll_values=None,
     payroll_target=None,
+    h6_income_values=None,
+    h6_revenue_target=None,
     n_ages=86,
 ):
     """
@@ -97,6 +99,8 @@ def calibrate_greg(
         ss_target: Optional Social Security target total
         payroll_values: Optional taxable payroll values per household
         payroll_target: Optional taxable payroll target total
+        h6_income_values: Optional H6 reform income values per household
+        h6_revenue_target: Optional H6 reform total revenue impact target
         n_ages: Number of age groups
 
     Returns:
@@ -108,9 +112,13 @@ def calibrate_greg(
         controls[f"age_{age_idx}"] = y_target[age_idx]
 
     # Build auxiliary variables dataframe if any continuous constraints are provided
-    if (ss_values is not None and ss_target is not None) or (
-        payroll_values is not None and payroll_target is not None
-    ):
+    needs_aux_df = (
+        (ss_values is not None and ss_target is not None) or
+        (payroll_values is not None and payroll_target is not None) or
+        (h6_income_values is not None and h6_revenue_target is not None)
+    )
+
+    if needs_aux_df:
         age_cols = {f"age_{i}": X[:, i] for i in range(n_ages)}
         aux_df = pd.DataFrame(age_cols)
 
@@ -122,6 +130,11 @@ def calibrate_greg(
             aux_df["payroll_total"] = payroll_values
             controls["payroll_total"] = payroll_target
 
+        # H6 reform revenue impact as a simple linear constraint
+        if h6_income_values is not None and h6_revenue_target is not None:
+            aux_df["h6_revenue"] = h6_income_values
+            controls["h6_revenue"] = h6_revenue_target
+
         aux_vars = aux_df
     else:
         aux_vars = X
@@ -145,6 +158,8 @@ def calibrate_weights(
     ss_target=None,
     payroll_values=None,
     payroll_target=None,
+    h6_income_values=None,
+    h6_revenue_target=None,
     n_ages=86,
     max_iters=100,
     tol=1e-6,
@@ -163,6 +178,8 @@ def calibrate_weights(
         ss_target: Optional SS target (for GREG with SS)
         payroll_values: Optional payroll values (for GREG with payroll)
         payroll_target: Optional payroll target (for GREG with payroll)
+        h6_income_values: Optional H6 reform income values per household
+        h6_revenue_target: Optional H6 reform total revenue impact target
         n_ages: Number of age groups
         max_iters: Max iterations for IPF
         tol: Convergence tolerance for IPF
@@ -185,6 +202,8 @@ def calibrate_weights(
                 ss_target,
                 payroll_values,
                 payroll_target,
+                h6_income_values,
+                h6_revenue_target,
                 n_ages,
             )
         except Exception as e:
diff --git a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py
index f47d9b74..b6dcd798 100644
--- a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py
+++ b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py
@@ -3,16 +3,17 @@
 
 
 Usage:
-    python run_household_projection.py [END_YEAR] [--greg] [--use-ss] [--use-payroll] [--save-h5]
+    python run_household_projection.py [END_YEAR] [--greg] [--use-ss] [--use-payroll] [--use-h6-reform] [--save-h5]
 
     END_YEAR: Optional ending year (default: 2035)
     --greg: Use GREG calibration instead of IPF (optional)
     --use-ss: Include Social Security benefit totals as calibration target (requires --greg)
     --use-payroll: Include taxable payroll totals as calibration target (requires --greg)
+    --use-h6-reform: Include H6 reform income impact ratio as calibration target (requires --greg)
     --save-h5: Save year-specific .h5 files with calibrated weights to ./projected_datasets/
 
 Examples:
-    python run_household_projection.py 2100 --greg --use-ss --use-payroll --save-h5
+    python run_household_projection.py 2100 --greg --use-ss --use-payroll --use-h6-reform --save-h5
 """
 
 import sys
@@ -36,6 +37,57 @@
 )
 
 
+def create_h6_reform():
+    """
+    Create H6 Social Security reform that phases out benefit taxation.
+
+    The reform has two phases:
+    1. Phase-in (2045-2053): Gradually increase thresholds
+    2. Elimination (2054-2100): Set thresholds to infinity
+    """
+    reform_payload = {
+        "gov.irs.social_security.taxability.threshold.base.main.SINGLE": {},
+        "gov.irs.social_security.taxability.threshold.base.main.JOINT": {},
+        "gov.irs.social_security.taxability.threshold.base.main.SEPARATE": {},
+        "gov.irs.social_security.taxability.threshold.base.main.HEAD_OF_HOUSEHOLD": {},
+        "gov.irs.social_security.taxability.threshold.base.main.SURVIVING_SPOUSE": {},
+    }
+
+    # Phase-in period: 2045 to 2053
+    for year in range(2045, 2054):
+        # Calculate the index (0 for 2045, 1 for 2046, etc.)
+        i = year - 2045
+
+        # H6 Formulas
+        single_val = 32_500 + (7_500 * i)
+        joint_val = 65_000 + (15_000 * i)
+
+        # Create the time key for this specific year
+        time_key = f"{year}-01-01.{year}-12-31"
+
+        # Assign values
+        reform_payload["gov.irs.social_security.taxability.threshold.base.main.SINGLE"][time_key] = single_val
+        reform_payload["gov.irs.social_security.taxability.threshold.base.main.SEPARATE"][time_key] = single_val
+        reform_payload["gov.irs.social_security.taxability.threshold.base.main.HEAD_OF_HOUSEHOLD"][time_key] = single_val
+        reform_payload["gov.irs.social_security.taxability.threshold.base.main.SURVIVING_SPOUSE"][time_key] = single_val
+        reform_payload["gov.irs.social_security.taxability.threshold.base.main.JOINT"][time_key] = joint_val
+
+    # Elimination period: 2054 to 2100
+    # To "Eliminate" taxation, we set the threshold to Infinity (or an arbitrarily high number)
+    final_period_key = "2054-01-01.2100-12-31"
+    inf_value = 9e99  # Effectively infinity
+
+    reform_payload["gov.irs.social_security.taxability.threshold.base.main.SINGLE"][final_period_key] = inf_value
+    reform_payload["gov.irs.social_security.taxability.threshold.base.main.SEPARATE"][final_period_key] = inf_value
+    reform_payload["gov.irs.social_security.taxability.threshold.base.main.HEAD_OF_HOUSEHOLD"][final_period_key] = inf_value
+    reform_payload["gov.irs.social_security.taxability.threshold.base.main.SURVIVING_SPOUSE"][final_period_key] = inf_value
+    reform_payload["gov.irs.social_security.taxability.threshold.base.main.JOINT"][final_period_key] = inf_value
+
+    # Create the Reform Object
+    from policyengine_core.reforms import Reform
+    return Reform.from_dict(reform_payload, country_id="us")
+
+
 # =========================================================================
 # DATASET CONFIGURATION
 # =========================================================================
@@ -79,6 +131,14 @@
         )
         USE_GREG = True
 
+USE_H6_REFORM = "--use-h6-reform" in sys.argv
+if USE_H6_REFORM:
+    sys.argv.remove("--use-h6-reform")
+    if not USE_GREG:
+        print("Warning: --use-h6-reform requires --greg, enabling GREG automatically")
+        USE_GREG = True
+    from ssa_data import load_h6_income_rate_change
+
 SAVE_H5 = "--save-h5" in sys.argv
 if SAVE_H5:
     sys.argv.remove("--save-h5")
@@ -106,6 +166,8 @@
     print(f"  Including Social Security benefits constraint: Yes")
 if USE_PAYROLL:
     print(f"  Including taxable payroll constraint: Yes")
+if USE_H6_REFORM:
+    print(f"  Including H6 reform income impact constraint: Yes")
 if SAVE_H5:
     print(f"  Saving year-specific .h5 files: Yes (to {OUTPUT_DIR}/)")
     os.makedirs(OUTPUT_DIR, exist_ok=True)
@@ -238,6 +300,46 @@
                 f"  [DEBUG {year}] Payroll baseline: ${payroll_baseline/1e9:.1f}B, target: ${payroll_target/1e9:.1f}B"
             )
 
+    h6_income_values = None
+    h6_revenue_target = None
+    if USE_H6_REFORM:
+        # Load target ratio from CSV
+        h6_target_ratio = load_h6_income_rate_change(year)
+
+        # Only calculate H6 reform impacts if the target ratio is non-zero
+        # (Reform has no effect before 2045, so skip computation for efficiency)
+        if h6_target_ratio != 0:
+            # Create and apply H6 reform
+            h6_reform = create_h6_reform()
+            reform_sim = Microsimulation(dataset=BASE_DATASET_PATH, reform=h6_reform)
+
+            # Calculate reform income tax
+            income_tax_reform_hh = reform_sim.calculate(
+                "income_tax", period=year, map_to="household"
+            )
+            income_tax_reform = income_tax_reform_hh.values
+
+            # Revenue impact per household
+            h6_income_values = income_tax_reform - income_tax_values
+
+            # Calculate H6 revenue target: ratio × payroll target
+            # This converts the ratio constraint to an absolute revenue constraint
+            payroll_target_year = load_taxable_payroll_projections(year)
+            h6_revenue_target = h6_target_ratio * payroll_target_year
+
+            # Debug output for key years
+            if year in display_years:
+                h6_impact_baseline = np.sum(h6_income_values * baseline_weights)
+                print(
+                    f"  [DEBUG {year}] H6 baseline revenue: ${h6_impact_baseline/1e9:.3f}B, target: ${h6_revenue_target/1e9:.3f}B"
+                )
+                print(
+                    f"  [DEBUG {year}] H6 target ratio: {h6_target_ratio:.4f} × payroll ${payroll_target_year/1e9:.1f}B"
+                )
+
+            del reform_sim
+            gc.collect()
+
     y_target = target_matrix[:, year_idx]
 
     w_new, iterations = calibrate_weights(
@@ -250,13 +352,15 @@
         ss_target=ss_target,
         payroll_values=payroll_values,
         payroll_target=payroll_target,
+        h6_income_values=h6_income_values,
+        h6_revenue_target=h6_revenue_target,
         n_ages=n_ages,
         max_iters=100,
         tol=1e-6,
         verbose=False,
     )
 
-    if year in display_years and (USE_SS or USE_PAYROLL):
+    if year in display_years and (USE_SS or USE_PAYROLL or USE_H6_REFORM):
         if USE_SS:
             ss_achieved = np.sum(ss_values * w_new)
             print(
@@ -267,6 +371,12 @@
             print(
                 f"  [DEBUG {year}] Payroll achieved: ${payroll_achieved/1e9:.1f}B (error: {(payroll_achieved - payroll_target)/payroll_target*100:.1f}%)"
             )
+        if USE_H6_REFORM and h6_revenue_target is not None:
+            h6_revenue_achieved = np.sum(h6_income_values * w_new)
+            error_pct = (h6_revenue_achieved - h6_revenue_target) / abs(h6_revenue_target) * 100 if h6_revenue_target != 0 else 0
+            print(
+                f"  [DEBUG {year}] H6 achieved revenue: ${h6_revenue_achieved/1e9:.3f}B (error: {error_pct:.1f}%)"
+            )
 
     weights_matrix[:, year_idx] = w_new
     baseline_weights_matrix[:, year_idx] = baseline_weights
diff --git a/policyengine_us_data/datasets/cps/long_term/ssa_data.py b/policyengine_us_data/datasets/cps/long_term/ssa_data.py
index 248e9dc9..46582262 100644
--- a/policyengine_us_data/datasets/cps/long_term/ssa_data.py
+++ b/policyengine_us_data/datasets/cps/long_term/ssa_data.py
@@ -71,3 +71,21 @@ def load_taxable_payroll_projections(year):
     row = df[df["year"] == year]
     nominal_billions = row["taxable_payroll_in_billion_nominal_usd"].values[0]
     return nominal_billions * 1e9
+
+
+def load_h6_income_rate_change(year):
+    """
+    Load H6 reform income rate change target for a given year.
+
+    Args:
+        year: Year to load rate change for
+
+    Returns:
+        H6 income rate change as decimal (e.g., -0.0018 for -0.18%)
+    """
+    csv_path = STORAGE_FOLDER / "social_security_aux.csv"
+    df = pd.read_csv(csv_path)
+
+    row = df[df["year"] == year]
+    # CSV stores as percentage (e.g., -0.18), convert to decimal
+    return row["h6_income_rate_change"].values[0] / 100
diff --git a/policyengine_us_data/storage/social_security_aux.csv b/policyengine_us_data/storage/social_security_aux.csv
index abe4be38..07f875fb 100644
--- a/policyengine_us_data/storage/social_security_aux.csv
+++ b/policyengine_us_data/storage/social_security_aux.csv
@@ -1,77 +1,77 @@
-year,oasdi_cost_in_billion_2025_usd,cpi_w_intermediate,oasdi_cost_in_billion_nominal_usd,taxable_payroll_in_billion_nominal_usd
-2025,1609,100,1609,10621.00
-2026,1660,102.49,1701.334,11129.00
-2027,1715,104.95,1799.8925,11627.00
-2028,1763,107.47,1894.6961,12159.00
-2029,1810,110.05,1991.905,12696.00
-2030,1856,112.69,2091.5264,13239.00
-2031,1903,115.4,2196.062,13798.00
-2032,1947,118.17,2300.7699,14380.00
-2033,1991,121,2409.11,14987.00
-2034,2032,123.91,2517.8512,15594.00
-2035,2073,126.88,2630.2224,16205.00
-2036,2114,129.93,2746.7202,16825.00
-2037,2155,133.04,2867.012,17465.00
-2038,2194,136.24,2989.1056,18132.00
-2039,2233,139.51,3115.2583,18819.00
-2040,2270,142.86,3242.922,19532.00
-2041,2306,146.28,3373.2168,20269.00
-2042,2342,149.79,3508.0818,21035.00
-2043,2378,153.39,3647.6142,21828.00
-2044,2415,157.07,3793.2405,22653.00
-2045,2452,160.84,3943.7968,23507.00
-2046,2488,164.7,4097.736,24391.00
-2047,2527,168.65,4261.7855,25313.00
-2048,2567,172.7,4433.209,26270.00
-2049,2609,176.85,4614.0165,27263.00
-2050,2652,181.09,4802.5068,28300.00
-2051,2696,185.44,4999.4624,29376.00
-2052,2743,189.89,5208.6827,30494.00
-2053,2792,194.44,5428.7648,31661.00
-2054,2842,199.11,5658.7062,32869.00
-2055,2895,203.89,5902.6155,34124.00
-2056,2950,208.78,6159.01,35432.00
-2057,3007,213.79,6428.6653,36790.00
-2058,3066,218.93,6712.3938,38201.00
-2059,3125,224.18,7005.625,39670.00
-2060,3184,229.56,7309.1904,41196.00
-2061,3243,235.07,7623.3201,42782.00
-2062,3303,240.71,7950.6513,44429.00
-2063,3362,246.49,8286.9938,46136.00
-2064,3422,252.4,8637.128,47902.00
-2065,3483,258.46,9002.1618,49733.00
-2066,3544,264.66,9379.5504,51631.00
-2067,3607,271.02,9775.6914,53598.00
-2068,3670,277.52,10184.984,55637.00
-2069,3735,284.18,10614.123,57746.00
-2070,3801,291,11060.91,59930.00
-2071,3867,297.99,11523.2733,62196.00
-2072,3934,305.14,12004.2076,64543.00
-2073,4002,312.46,12504.6492,66975.00
-2074,4071,319.96,13025.5716,69501.00
-2075,4139,327.64,13561.0196,72131.00
-2076,4206,335.5,14111.13,74862.00
-2077,4273,343.55,14679.8915,77698.00
-2078,4339,351.8,15264.602,80650.00
-2079,4403,360.24,15861.3672,83727.00
-2080,4467,368.89,16478.3163,86933.00
-2081,4530,377.74,17111.622,90268.00
-2082,4593,386.81,17766.1833,93749.00
-2083,4655,396.09,18437.9895,97381.00
-2084,4716,405.6,19128.096,101163.00
-2085,4775,415.33,19832.0075,105104.00
-2086,4833,425.3,20554.749,109217.00
-2087,4891,435.51,21300.7941,113504.00
-2088,4948,445.96,22066.1008,117973.00
-2089,5006,456.66,22860.3996,122629.00
-2090,5064,467.62,23680.2768,127477.00
-2091,5125,478.84,24540.55,132518.00
-2092,5188,490.34,25438.8392,137764.00
-2093,5254,502.1,26380.334,143215.00
-2094,5323,514.16,27368.7368,148876.00
-2095,5396,526.49,28409.4004,154754.00
-2096,5472,539.13,29501.1936,160855.00
-2097,5551,552.07,30645.4057,167185.00
-2098,5633,565.32,31844.4756,173750.00
-2099,5719,578.89,33106.7191,180557.00
-2100,5809,592.78,34434.5902,187614.00
+year,oasdi_cost_in_billion_2025_usd,cpi_w_intermediate,oasdi_cost_in_billion_nominal_usd,taxable_payroll_in_billion_nominal_usd,h6_income_rate_change
+2025,1609,100,1609,10621,0
+2026,1660,102.49,1701.334,11129,0
+2027,1715,104.95,1799.8925,11627,0
+2028,1763,107.47,1894.6961,12159,0
+2029,1810,110.05,1991.905,12696,0
+2030,1856,112.69,2091.5264,13239,0
+2031,1903,115.4,2196.062,13798,0
+2032,1947,118.17,2300.7699,14380,0
+2033,1991,121,2409.11,14987,0
+2034,2032,123.91,2517.8512,15594,0
+2035,2073,126.88,2630.2224,16205,0
+2036,2114,129.93,2746.7202,16825,0
+2037,2155,133.04,2867.012,17465,0
+2038,2194,136.24,2989.1056,18132,0
+2039,2233,139.51,3115.2583,18819,0
+2040,2270,142.86,3242.922,19532,0
+2041,2306,146.28,3373.2168,20269,0
+2042,2342,149.79,3508.0818,21035,0
+2043,2378,153.39,3647.6142,21828,0
+2044,2415,157.07,3793.2405,22653,0
+2045,2452,160.84,3943.7968,23507,-0.07
+2046,2488,164.7,4097.736,24391,-0.12
+2047,2527,168.65,4261.7855,25313,-0.18
+2048,2567,172.7,4433.209,26270,-0.23
+2049,2609,176.85,4614.0165,27263,-0.27
+2050,2652,181.09,4802.5068,28300,-0.32
+2051,2696,185.44,4999.4624,29376,-0.36
+2052,2743,189.89,5208.6827,30494,-0.4
+2053,2792,194.44,5428.7648,31661,-0.43
+2054,2842,199.11,5658.7062,32869,-1
+2055,2895,203.89,5902.6155,34124,-1.01
+2056,2950,208.78,6159.01,35432,-1.01
+2057,3007,213.79,6428.6653,36790,-1.02
+2058,3066,218.93,6712.3938,38201,-1.03
+2059,3125,224.18,7005.625,39670,-1.04
+2060,3184,229.56,7309.1904,41196,-1.04
+2061,3243,235.07,7623.3201,42782,-1.05
+2062,3303,240.71,7950.6513,44429,-1.06
+2063,3362,246.49,8286.9938,46136,-1.06
+2064,3422,252.4,8637.128,47902,-1.07
+2065,3483,258.46,9002.1618,49733,-1.07
+2066,3544,264.66,9379.5504,51631,-1.08
+2067,3607,271.02,9775.6914,53598,-1.09
+2068,3670,277.52,10184.984,55637,-1.09
+2069,3735,284.18,10614.123,57746,-1.1
+2070,3801,291,11060.91,59930,-1.1
+2071,3867,297.99,11523.2733,62196,-1.11
+2072,3934,305.14,12004.2076,64543,-1.12
+2073,4002,312.46,12504.6492,66975,-1.12
+2074,4071,319.96,13025.5716,69501,-1.13
+2075,4139,327.64,13561.0196,72131,-1.13
+2076,4206,335.5,14111.13,74862,-1.14
+2077,4273,343.55,14679.8915,77698,-1.14
+2078,4339,351.8,15264.602,80650,-1.14
+2079,4403,360.24,15861.3672,83727,-1.15
+2080,4467,368.89,16478.3163,86933,-1.15
+2081,4530,377.74,17111.622,90268,-1.15
+2082,4593,386.81,17766.1833,93749,-1.15
+2083,4655,396.09,18437.9895,97381,-1.15
+2084,4716,405.6,19128.096,101163,-1.15
+2085,4775,415.33,19832.0075,105104,-1.15
+2086,4833,425.3,20554.749,109217,-1.14
+2087,4891,435.51,21300.7941,113504,-1.14
+2088,4948,445.96,22066.1008,117973,-1.14
+2089,5006,456.66,22860.3996,122629,-1.13
+2090,5064,467.62,23680.2768,127477,-1.13
+2091,5125,478.84,24540.55,132518,-1.13
+2092,5188,490.34,25438.8392,137764,-1.12
+2093,5254,502.1,26380.334,143215,-1.12
+2094,5323,514.16,27368.7368,148876,-1.12
+2095,5396,526.49,28409.4004,154754,-1.12
+2096,5472,539.13,29501.1936,160855,-1.12
+2097,5551,552.07,30645.4057,167185,-1.11
+2098,5633,565.32,31844.4756,173750,-1.11
+2099,5719,578.89,33106.7191,180557,-1.12
+2100,5809,592.78,34434.5902,187614,-1.12

From ed19be6aa0aaa36c12eee5d311aef42234076f9f Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Sun, 23 Nov 2025 15:34:50 -0500
Subject: [PATCH 08/12] Add configurable start year and improve H6 reform
 implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add START_YEAR parameter to run_household_projection.py CLI
- Enhance H6 reform with threshold crossover handling for OASDI/HI tiers
- Implement min/max swapping logic to prevent engine errors when OASDI thresholds exceed HI thresholds
- Update usage documentation with clearer examples
- Add additional SSA data source URL to storage README

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../cps/long_term/run_household_projection.py | 154 +++++++++++++-----
 policyengine_us_data/storage/README.md        |   4 +-
 2 files changed, 119 insertions(+), 39 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py
index b6dcd798..c3178843 100644
--- a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py
+++ b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py
@@ -3,8 +3,9 @@
 
 
 Usage:
-    python run_household_projection.py [END_YEAR] [--greg] [--use-ss] [--use-payroll] [--use-h6-reform] [--save-h5]
+    python run_household_projection.py [START_YEAR] [END_YEAR] [--greg] [--use-ss] [--use-payroll] [--use-h6-reform] [--save-h5]
 
+    START_YEAR: Optional starting year (default: 2025)
     END_YEAR: Optional ending year (default: 2035)
     --greg: Use GREG calibration instead of IPF (optional)
     --use-ss: Include Social Security benefit totals as calibration target (requires --greg)
@@ -13,7 +14,8 @@
     --save-h5: Save year-specific .h5 files with calibrated weights to ./projected_datasets/
 
 Examples:
-    python run_household_projection.py 2100 --greg --use-ss --use-payroll --use-h6-reform --save-h5
+    python run_household_projection.py 2045 2045 --greg --use-ss  # single year
+    python run_household_projection.py 2025 2100 --greg --use-ss --use-payroll --use-h6-reform --save-h5
 """
 
 import sys
@@ -39,49 +41,125 @@
 
 def create_h6_reform():
     """
-    Create H6 Social Security reform that phases out benefit taxation.
-
-    The reform has two phases:
-    1. Phase-in (2045-2053): Gradually increase thresholds
-    2. Elimination (2054-2100): Set thresholds to infinity
+    Implements Proposal H6:
+    1. Phase out OASDI taxation (Tier 1) from 2045-2053 by raising thresholds.
+    2. Eliminate OASDI taxation fully in 2054+ (set Tier 1 rate to 0%).
+    3. HOLD HARMLESS: Maintain HI taxation (Tier 2) revenue at current law levels throughout.
+    
+    CRITICAL: Handles the "Threshold Crossover" problem.
+    As OASDI thresholds rise above HI thresholds ($34k/$44k), we must
+    swap the parameter definitions to prevent the engine from breaking.
     """
+    
     reform_payload = {
+        # Thresholds
         "gov.irs.social_security.taxability.threshold.base.main.SINGLE": {},
         "gov.irs.social_security.taxability.threshold.base.main.JOINT": {},
-        "gov.irs.social_security.taxability.threshold.base.main.SEPARATE": {},
         "gov.irs.social_security.taxability.threshold.base.main.HEAD_OF_HOUSEHOLD": {},
         "gov.irs.social_security.taxability.threshold.base.main.SURVIVING_SPOUSE": {},
+        "gov.irs.social_security.taxability.threshold.base.main.SEPARATE": {},
+
+        "gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE": {},
+        "gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT": {},
+        "gov.irs.social_security.taxability.threshold.adjusted_base.main.HEAD_OF_HOUSEHOLD": {},
+        "gov.irs.social_security.taxability.threshold.adjusted_base.main.SURVIVING_SPOUSE": {},
+        "gov.irs.social_security.taxability.threshold.adjusted_base.main.SEPARATE": {},
+
+        # Rates - Base (Tier 1)
+        "gov.irs.social_security.taxability.rate.base.benefit_cap": {},
+        "gov.irs.social_security.taxability.rate.base.excess": {},
+        
+        # Rates - Additional (Tier 2 - HI)
+        "gov.irs.social_security.taxability.rate.additional.benefit_cap": {},
+        "gov.irs.social_security.taxability.rate.additional.excess": {},
     }
 
-    # Phase-in period: 2045 to 2053
+    # --- CONSTANTS: CURRENT LAW HI THRESHOLDS (FROZEN) ---
+    # We must preserve these specific triggers to protect the HI Trust Fund
+    HI_SINGLE = 34_000
+    HI_JOINT = 44_000
+    
+    # --- PHASE 1: THE TRANSITION (2045-2053) ---
     for year in range(2045, 2054):
-        # Calculate the index (0 for 2045, 1 for 2046, etc.)
+        period = f"{year}-01-01"
         i = year - 2045
-
-        # H6 Formulas
-        single_val = 32_500 + (7_500 * i)
-        joint_val = 65_000 + (15_000 * i)
-
-        # Create the time key for this specific year
-        time_key = f"{year}-01-01.{year}-12-31"
-
-        # Assign values
-        reform_payload["gov.irs.social_security.taxability.threshold.base.main.SINGLE"][time_key] = single_val
-        reform_payload["gov.irs.social_security.taxability.threshold.base.main.SEPARATE"][time_key] = single_val
-        reform_payload["gov.irs.social_security.taxability.threshold.base.main.HEAD_OF_HOUSEHOLD"][time_key] = single_val
-        reform_payload["gov.irs.social_security.taxability.threshold.base.main.SURVIVING_SPOUSE"][time_key] = single_val
-        reform_payload["gov.irs.social_security.taxability.threshold.base.main.JOINT"][time_key] = joint_val
-
-    # Elimination period: 2054 to 2100
-    # To "Eliminate" taxation, we set the threshold to Infinity (or an arbitrarily high number)
-    final_period_key = "2054-01-01.2100-12-31"
-    inf_value = 9e99  # Effectively infinity
-
-    reform_payload["gov.irs.social_security.taxability.threshold.base.main.SINGLE"][final_period_key] = inf_value
-    reform_payload["gov.irs.social_security.taxability.threshold.base.main.SEPARATE"][final_period_key] = inf_value
-    reform_payload["gov.irs.social_security.taxability.threshold.base.main.HEAD_OF_HOUSEHOLD"][final_period_key] = inf_value
-    reform_payload["gov.irs.social_security.taxability.threshold.base.main.SURVIVING_SPOUSE"][final_period_key] = inf_value
-    reform_payload["gov.irs.social_security.taxability.threshold.base.main.JOINT"][final_period_key] = inf_value
+        
+        # 1. Calculate the Target OASDI Thresholds (Rising)
+        #    (a) 2045 = $32,500 ... (i) 2053 = $92,500
+        oasdi_target_single = 32_500 + (7_500 * i)
+        oasdi_target_joint = 65_000 + (15_000 * i)
+        
+        # 2. Handle Threshold Crossover
+        #    OASDI thresholds rise above HI thresholds during phase-out.
+        #    We must swap parameters: put lower threshold in 'base' slot.
+
+        # --- SET RATES FOR TRANSITION (2045-2053) ---
+        # Joint filers cross immediately in 2045 ($65k OASDI > $44k HI).
+        # Single filers cross in 2046 ($40k OASDI > $34k HI).
+        #
+        # PolicyEngine forces one global rate structure per year.
+        # We choose swapped rates (0.35/0.85) for ALL years to minimize error:
+        #
+        # Trade-off in 2045:
+        #   - Single filers: $225 undertax (15% on $1.5k range) ✓ acceptable
+        #   - Joint filers: Would be $3,150 overtax with default rates ✗ unacceptable
+        #
+        # The swapped rate error is 14x smaller and aligns with tax-cutting intent.
+
+        # Tier 1 (Base): HI ONLY (35%)
+        reform_payload["gov.irs.social_security.taxability.rate.base.benefit_cap"][period] = 0.35
+        reform_payload["gov.irs.social_security.taxability.rate.base.excess"][period] = 0.35
+
+        # Tier 2 (Additional): HI + OASDI Combined (85%)
+        reform_payload["gov.irs.social_security.taxability.rate.additional.benefit_cap"][period] = 0.85
+        reform_payload["gov.irs.social_security.taxability.rate.additional.excess"][period] = 0.85
+
+        # --- SET THRESHOLDS (MIN/MAX SWAP) ---
+        # Always put the smaller number in 'base' and larger in 'adjusted_base'
+        
+        # Single
+        reform_payload["gov.irs.social_security.taxability.threshold.base.main.SINGLE"][period] = min(oasdi_target_single, HI_SINGLE)
+        reform_payload["gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE"][period] = max(oasdi_target_single, HI_SINGLE)
+        
+        # Joint
+        reform_payload["gov.irs.social_security.taxability.threshold.base.main.JOINT"][period] = min(oasdi_target_joint, HI_JOINT)
+        reform_payload["gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT"][period] = max(oasdi_target_joint, HI_JOINT)
+        
+        # Map other statuses (Head/Surviving Spouse -> Single logic, Separate -> Single logic usually)
+        # Note: Separate is usually 0, but for H6 strictness we map to Single logic here
+        for status in ["HEAD_OF_HOUSEHOLD", "SURVIVING_SPOUSE", "SEPARATE"]:
+            reform_payload[f"gov.irs.social_security.taxability.threshold.base.main.{status}"][period] = min(oasdi_target_single, HI_SINGLE)
+            reform_payload[f"gov.irs.social_security.taxability.threshold.adjusted_base.main.{status}"][period] = max(oasdi_target_single, HI_SINGLE)
+
+    # --- PHASE 2: ELIMINATION (2054+) ---
+    # OASDI is gone. We only collect HI.
+    # Logic: "Base" becomes the HI tier ($34k). Rate is 0.35.
+    # "Adjusted" becomes irrelevant (set high or rate to same).
+    
+    elim_period = "2054-01-01.2100-12-31"
+    
+    # 1. Set Thresholds to "HI Only" mode
+    # Base = $34k / $44k
+    reform_payload["gov.irs.social_security.taxability.threshold.base.main.SINGLE"][elim_period] = HI_SINGLE
+    reform_payload["gov.irs.social_security.taxability.threshold.base.main.JOINT"][elim_period] = HI_JOINT
+    
+    # Adjusted = Infinity (Disable the second tier effectively)
+    reform_payload["gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE"][elim_period] = 9_999_999
+    reform_payload["gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT"][elim_period] = 9_999_999
+    
+    # Map others
+    for status in ["HEAD_OF_HOUSEHOLD", "SURVIVING_SPOUSE", "SEPARATE"]:
+         reform_payload[f"gov.irs.social_security.taxability.threshold.base.main.{status}"][elim_period] = HI_SINGLE
+         reform_payload[f"gov.irs.social_security.taxability.threshold.adjusted_base.main.{status}"][elim_period] = 9_999_999
+
+    # 2. Set Rates for HI Only Revenue
+    # Tier 1 (Now the ONLY tier) = 35% (HI Share)
+    reform_payload["gov.irs.social_security.taxability.rate.base.benefit_cap"][elim_period] = 0.35
+    reform_payload["gov.irs.social_security.taxability.rate.base.excess"][elim_period] = 0.35
+    
+    # Tier 2 (Disabled via threshold, but zero out for safety)
+    reform_payload["gov.irs.social_security.taxability.rate.additional.benefit_cap"][elim_period] = 0.35
+    reform_payload["gov.irs.social_security.taxability.rate.additional.excess"][elim_period] = 0.35
 
     # Create the Reform Object
     from policyengine_core.reforms import Reform
@@ -104,7 +182,6 @@ def create_h6_reform():
 }
 
 SELECTED_DATASET = "enhanced_cps_2024"
-START_YEAR = 2025
 
 # Load selected dataset configuration
 BASE_DATASET_PATH = DATASET_OPTIONS[SELECTED_DATASET]["path"]
@@ -143,7 +220,8 @@ def create_h6_reform():
 if SAVE_H5:
     sys.argv.remove("--save-h5")
 
-END_YEAR = int(sys.argv[1]) if len(sys.argv) > 1 else 2035
+START_YEAR = int(sys.argv[1]) if len(sys.argv) > 1 else 2025
+END_YEAR = int(sys.argv[2]) if len(sys.argv) > 2 else 2035
 
 if USE_GREG:
     from samplics.weighting import SampleWeight
@@ -184,7 +262,7 @@ def create_h6_reform():
 print("STEP 1: DEMOGRAPHIC PROJECTIONS")
 print("=" * 70)
 
-target_matrix = load_ssa_age_projections(end_year=END_YEAR)
+target_matrix = load_ssa_age_projections(start_year=START_YEAR, end_year=END_YEAR)
 n_years = target_matrix.shape[1]
 n_ages = target_matrix.shape[0]
 
diff --git a/policyengine_us_data/storage/README.md b/policyengine_us_data/storage/README.md
index dfe5576d..2b0da6b9 100644
--- a/policyengine_us_data/storage/README.md
+++ b/policyengine_us_data/storage/README.md
@@ -25,5 +25,7 @@
 - **social_security_aux.csv**
   • Source: SSA Single Year supplementary tables
   • Date: 2025 Trustees Report
-  • Location: https://www.ssa.gov/oact/tr/2025/lrIndex.html
+  • Locations:
+     - https://www.ssa.gov/oact/tr/2025/lrIndex.html
+     - `https://www.ssa.gov/oact/solvency/provisions/tables/table_run133.html`
   • Notes: Contains OASDI cost projections and taxable payroll data (2025-2100) 

From cd00cdb58a785dac67e5db8f0dcc3cec97ab96f7 Mon Sep 17 00:00:00 2001
From: "baogorek@gmail.com" <baogorek@gmail.com>
Date: Wed, 3 Dec 2025 14:40:59 -0500
Subject: [PATCH 09/12] linting

---
 .../datasets/cps/long_term/calibration.py     |   6 +-
 .../cps/long_term/run_household_projection.py | 134 ++++++++++++------
 2 files changed, 96 insertions(+), 44 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/long_term/calibration.py b/policyengine_us_data/datasets/cps/long_term/calibration.py
index 694312fc..5019baab 100644
--- a/policyengine_us_data/datasets/cps/long_term/calibration.py
+++ b/policyengine_us_data/datasets/cps/long_term/calibration.py
@@ -113,9 +113,9 @@ def calibrate_greg(
 
     # Build auxiliary variables dataframe if any continuous constraints are provided
     needs_aux_df = (
-        (ss_values is not None and ss_target is not None) or
-        (payroll_values is not None and payroll_target is not None) or
-        (h6_income_values is not None and h6_revenue_target is not None)
+        (ss_values is not None and ss_target is not None)
+        or (payroll_values is not None and payroll_target is not None)
+        or (h6_income_values is not None and h6_revenue_target is not None)
     )
 
     if needs_aux_df:
diff --git a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py
index c3178843..9dbb7f2d 100644
--- a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py
+++ b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py
@@ -45,12 +45,12 @@ def create_h6_reform():
     1. Phase out OASDI taxation (Tier 1) from 2045-2053 by raising thresholds.
     2. Eliminate OASDI taxation fully in 2054+ (set Tier 1 rate to 0%).
     3. HOLD HARMLESS: Maintain HI taxation (Tier 2) revenue at current law levels throughout.
-    
+
     CRITICAL: Handles the "Threshold Crossover" problem.
     As OASDI thresholds rise above HI thresholds ($34k/$44k), we must
     swap the parameter definitions to prevent the engine from breaking.
     """
-    
+
     reform_payload = {
         # Thresholds
         "gov.irs.social_security.taxability.threshold.base.main.SINGLE": {},
@@ -58,17 +58,14 @@ def create_h6_reform():
         "gov.irs.social_security.taxability.threshold.base.main.HEAD_OF_HOUSEHOLD": {},
         "gov.irs.social_security.taxability.threshold.base.main.SURVIVING_SPOUSE": {},
         "gov.irs.social_security.taxability.threshold.base.main.SEPARATE": {},
-
         "gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE": {},
         "gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT": {},
         "gov.irs.social_security.taxability.threshold.adjusted_base.main.HEAD_OF_HOUSEHOLD": {},
         "gov.irs.social_security.taxability.threshold.adjusted_base.main.SURVIVING_SPOUSE": {},
         "gov.irs.social_security.taxability.threshold.adjusted_base.main.SEPARATE": {},
-
         # Rates - Base (Tier 1)
         "gov.irs.social_security.taxability.rate.base.benefit_cap": {},
         "gov.irs.social_security.taxability.rate.base.excess": {},
-        
         # Rates - Additional (Tier 2 - HI)
         "gov.irs.social_security.taxability.rate.additional.benefit_cap": {},
         "gov.irs.social_security.taxability.rate.additional.excess": {},
@@ -78,17 +75,17 @@ def create_h6_reform():
     # We must preserve these specific triggers to protect the HI Trust Fund
     HI_SINGLE = 34_000
     HI_JOINT = 44_000
-    
+
     # --- PHASE 1: THE TRANSITION (2045-2053) ---
     for year in range(2045, 2054):
         period = f"{year}-01-01"
         i = year - 2045
-        
+
         # 1. Calculate the Target OASDI Thresholds (Rising)
         #    (a) 2045 = $32,500 ... (i) 2053 = $92,500
         oasdi_target_single = 32_500 + (7_500 * i)
         oasdi_target_joint = 65_000 + (15_000 * i)
-        
+
         # 2. Handle Threshold Crossover
         #    OASDI thresholds rise above HI thresholds during phase-out.
         #    We must swap parameters: put lower threshold in 'base' slot.
@@ -107,62 +104,103 @@ def create_h6_reform():
         # The swapped rate error is 14x smaller and aligns with tax-cutting intent.
 
         # Tier 1 (Base): HI ONLY (35%)
-        reform_payload["gov.irs.social_security.taxability.rate.base.benefit_cap"][period] = 0.35
-        reform_payload["gov.irs.social_security.taxability.rate.base.excess"][period] = 0.35
+        reform_payload[
+            "gov.irs.social_security.taxability.rate.base.benefit_cap"
+        ][period] = 0.35
+        reform_payload["gov.irs.social_security.taxability.rate.base.excess"][
+            period
+        ] = 0.35
 
         # Tier 2 (Additional): HI + OASDI Combined (85%)
-        reform_payload["gov.irs.social_security.taxability.rate.additional.benefit_cap"][period] = 0.85
-        reform_payload["gov.irs.social_security.taxability.rate.additional.excess"][period] = 0.85
+        reform_payload[
+            "gov.irs.social_security.taxability.rate.additional.benefit_cap"
+        ][period] = 0.85
+        reform_payload[
+            "gov.irs.social_security.taxability.rate.additional.excess"
+        ][period] = 0.85
 
         # --- SET THRESHOLDS (MIN/MAX SWAP) ---
         # Always put the smaller number in 'base' and larger in 'adjusted_base'
-        
+
         # Single
-        reform_payload["gov.irs.social_security.taxability.threshold.base.main.SINGLE"][period] = min(oasdi_target_single, HI_SINGLE)
-        reform_payload["gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE"][period] = max(oasdi_target_single, HI_SINGLE)
-        
+        reform_payload[
+            "gov.irs.social_security.taxability.threshold.base.main.SINGLE"
+        ][period] = min(oasdi_target_single, HI_SINGLE)
+        reform_payload[
+            "gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE"
+        ][period] = max(oasdi_target_single, HI_SINGLE)
+
         # Joint
-        reform_payload["gov.irs.social_security.taxability.threshold.base.main.JOINT"][period] = min(oasdi_target_joint, HI_JOINT)
-        reform_payload["gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT"][period] = max(oasdi_target_joint, HI_JOINT)
-        
+        reform_payload[
+            "gov.irs.social_security.taxability.threshold.base.main.JOINT"
+        ][period] = min(oasdi_target_joint, HI_JOINT)
+        reform_payload[
+            "gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT"
+        ][period] = max(oasdi_target_joint, HI_JOINT)
+
         # Map other statuses (Head/Surviving Spouse -> Single logic, Separate -> Single logic usually)
         # Note: Separate is usually 0, but for H6 strictness we map to Single logic here
         for status in ["HEAD_OF_HOUSEHOLD", "SURVIVING_SPOUSE", "SEPARATE"]:
-            reform_payload[f"gov.irs.social_security.taxability.threshold.base.main.{status}"][period] = min(oasdi_target_single, HI_SINGLE)
-            reform_payload[f"gov.irs.social_security.taxability.threshold.adjusted_base.main.{status}"][period] = max(oasdi_target_single, HI_SINGLE)
+            reform_payload[
+                f"gov.irs.social_security.taxability.threshold.base.main.{status}"
+            ][period] = min(oasdi_target_single, HI_SINGLE)
+            reform_payload[
+                f"gov.irs.social_security.taxability.threshold.adjusted_base.main.{status}"
+            ][period] = max(oasdi_target_single, HI_SINGLE)
 
     # --- PHASE 2: ELIMINATION (2054+) ---
     # OASDI is gone. We only collect HI.
     # Logic: "Base" becomes the HI tier ($34k). Rate is 0.35.
     # "Adjusted" becomes irrelevant (set high or rate to same).
-    
+
     elim_period = "2054-01-01.2100-12-31"
-    
+
     # 1. Set Thresholds to "HI Only" mode
     # Base = $34k / $44k
-    reform_payload["gov.irs.social_security.taxability.threshold.base.main.SINGLE"][elim_period] = HI_SINGLE
-    reform_payload["gov.irs.social_security.taxability.threshold.base.main.JOINT"][elim_period] = HI_JOINT
-    
+    reform_payload[
+        "gov.irs.social_security.taxability.threshold.base.main.SINGLE"
+    ][elim_period] = HI_SINGLE
+    reform_payload[
+        "gov.irs.social_security.taxability.threshold.base.main.JOINT"
+    ][elim_period] = HI_JOINT
+
     # Adjusted = Infinity (Disable the second tier effectively)
-    reform_payload["gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE"][elim_period] = 9_999_999
-    reform_payload["gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT"][elim_period] = 9_999_999
-    
+    reform_payload[
+        "gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE"
+    ][elim_period] = 9_999_999
+    reform_payload[
+        "gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT"
+    ][elim_period] = 9_999_999
+
     # Map others
     for status in ["HEAD_OF_HOUSEHOLD", "SURVIVING_SPOUSE", "SEPARATE"]:
-         reform_payload[f"gov.irs.social_security.taxability.threshold.base.main.{status}"][elim_period] = HI_SINGLE
-         reform_payload[f"gov.irs.social_security.taxability.threshold.adjusted_base.main.{status}"][elim_period] = 9_999_999
+        reform_payload[
+            f"gov.irs.social_security.taxability.threshold.base.main.{status}"
+        ][elim_period] = HI_SINGLE
+        reform_payload[
+            f"gov.irs.social_security.taxability.threshold.adjusted_base.main.{status}"
+        ][elim_period] = 9_999_999
 
     # 2. Set Rates for HI Only Revenue
     # Tier 1 (Now the ONLY tier) = 35% (HI Share)
-    reform_payload["gov.irs.social_security.taxability.rate.base.benefit_cap"][elim_period] = 0.35
-    reform_payload["gov.irs.social_security.taxability.rate.base.excess"][elim_period] = 0.35
-    
+    reform_payload["gov.irs.social_security.taxability.rate.base.benefit_cap"][
+        elim_period
+    ] = 0.35
+    reform_payload["gov.irs.social_security.taxability.rate.base.excess"][
+        elim_period
+    ] = 0.35
+
     # Tier 2 (Disabled via threshold, but zero out for safety)
-    reform_payload["gov.irs.social_security.taxability.rate.additional.benefit_cap"][elim_period] = 0.35
-    reform_payload["gov.irs.social_security.taxability.rate.additional.excess"][elim_period] = 0.35
+    reform_payload[
+        "gov.irs.social_security.taxability.rate.additional.benefit_cap"
+    ][elim_period] = 0.35
+    reform_payload[
+        "gov.irs.social_security.taxability.rate.additional.excess"
+    ][elim_period] = 0.35
 
     # Create the Reform Object
     from policyengine_core.reforms import Reform
+
     return Reform.from_dict(reform_payload, country_id="us")
 
 
@@ -212,7 +250,9 @@ def create_h6_reform():
 if USE_H6_REFORM:
     sys.argv.remove("--use-h6-reform")
     if not USE_GREG:
-        print("Warning: --use-h6-reform requires --greg, enabling GREG automatically")
+        print(
+            "Warning: --use-h6-reform requires --greg, enabling GREG automatically"
+        )
         USE_GREG = True
     from ssa_data import load_h6_income_rate_change
 
@@ -262,7 +302,9 @@ def create_h6_reform():
 print("STEP 1: DEMOGRAPHIC PROJECTIONS")
 print("=" * 70)
 
-target_matrix = load_ssa_age_projections(start_year=START_YEAR, end_year=END_YEAR)
+target_matrix = load_ssa_age_projections(
+    start_year=START_YEAR, end_year=END_YEAR
+)
 n_years = target_matrix.shape[1]
 n_ages = target_matrix.shape[0]
 
@@ -389,7 +431,9 @@ def create_h6_reform():
         if h6_target_ratio != 0:
             # Create and apply H6 reform
             h6_reform = create_h6_reform()
-            reform_sim = Microsimulation(dataset=BASE_DATASET_PATH, reform=h6_reform)
+            reform_sim = Microsimulation(
+                dataset=BASE_DATASET_PATH, reform=h6_reform
+            )
 
             # Calculate reform income tax
             income_tax_reform_hh = reform_sim.calculate(
@@ -407,7 +451,9 @@ def create_h6_reform():
 
             # Debug output for key years
             if year in display_years:
-                h6_impact_baseline = np.sum(h6_income_values * baseline_weights)
+                h6_impact_baseline = np.sum(
+                    h6_income_values * baseline_weights
+                )
                 print(
                     f"  [DEBUG {year}] H6 baseline revenue: ${h6_impact_baseline/1e9:.3f}B, target: ${h6_revenue_target/1e9:.3f}B"
                 )
@@ -451,7 +497,13 @@ def create_h6_reform():
             )
         if USE_H6_REFORM and h6_revenue_target is not None:
             h6_revenue_achieved = np.sum(h6_income_values * w_new)
-            error_pct = (h6_revenue_achieved - h6_revenue_target) / abs(h6_revenue_target) * 100 if h6_revenue_target != 0 else 0
+            error_pct = (
+                (h6_revenue_achieved - h6_revenue_target)
+                / abs(h6_revenue_target)
+                * 100
+                if h6_revenue_target != 0
+                else 0
+            )
             print(
                 f"  [DEBUG {year}] H6 achieved revenue: ${h6_revenue_achieved/1e9:.3f}B (error: {error_pct:.1f}%)"
             )

From 35717146820ca737c4d4f783df1ddbffee12091e Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Wed, 3 Dec 2025 17:00:35 -0500
Subject: [PATCH 10/12] Improve PR: Node.js 22 LTS, H6 tests, better changelog
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Update Node.js from 20 to 22 (current Active LTS)
- Add 18 unit tests for H6 reform threshold crossover logic
- Improve changelog to document H6 Social Security reform additions

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .github/workflows/reusable_test.yaml |   2 +-
 changelog_entry.yaml                 |   7 +-
 tests/test_h6_reform.py              | 244 +++++++++++++++++++++++++++
 3 files changed, 249 insertions(+), 4 deletions(-)
 create mode 100644 tests/test_h6_reform.py

diff --git a/.github/workflows/reusable_test.yaml b/.github/workflows/reusable_test.yaml
index e09e6d94..861d6e5c 100644
--- a/.github/workflows/reusable_test.yaml
+++ b/.github/workflows/reusable_test.yaml
@@ -48,7 +48,7 @@ jobs:
       - name: Set up Node.js
         uses: actions/setup-node@v4
         with:
-          node-version: '20'
+          node-version: '22'
 
       - uses: "google-github-actions/auth@v2"
         if: inputs.upload_data
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index 4aad48fa..9a489ead 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -4,7 +4,8 @@
     - GitHub Pages documentation deployment (was deploying wrong directory causing blank pages)
     - Removed timeout and error suppression from documentation build
     added:
-    - Node.js setup to CI workflow for MyST builds
-    - start_year parameter to load_ssa_age_projections function
-    - MAX_SINGLE_AGE constant to replace hardcoded values
+    - Node.js 22 LTS setup to CI workflow for MyST builds
+    - H6 Social Security reform calibration for long-term projections (phases out OASDI taxation 2045-2054)
+    - H6 threshold crossover handling when OASDI thresholds exceed HI thresholds
+    - start_year parameter to run_household_projection.py CLI
     - docs/README.md documenting MyST build output pitfall
diff --git a/tests/test_h6_reform.py b/tests/test_h6_reform.py
new file mode 100644
index 00000000..478abc14
--- /dev/null
+++ b/tests/test_h6_reform.py
@@ -0,0 +1,244 @@
+"""
+Tests for H6 Social Security reform threshold crossover logic.
+
+The H6 reform phases out OASDI taxation from 2045-2053 while preserving
+HI taxation. This requires careful handling when OASDI thresholds rise
+above HI thresholds, necessitating parameter swapping.
+
+These tests validate the mathematical logic without requiring full
+policyengine imports (which need heavy dependencies like torch).
+"""
+
+import pytest
+
+
+# Constants from the H6 reform implementation
+HI_SINGLE = 34_000
+HI_JOINT = 44_000
+
+
+def calculate_oasdi_thresholds(year: int) -> tuple[int, int]:
+    """Calculate OASDI thresholds for a given year during phase-out."""
+    if year < 2045 or year > 2053:
+        raise ValueError("Phase-out only applies to 2045-2053")
+
+    i = year - 2045
+    oasdi_single = 32_500 + (7_500 * i)
+    oasdi_joint = 65_000 + (15_000 * i)
+    return oasdi_single, oasdi_joint
+
+
+def get_swapped_thresholds(
+    oasdi_threshold: int, hi_threshold: int
+) -> tuple[int, int]:
+    """
+    Apply min/max swap to handle threshold crossover.
+
+    Returns (base_threshold, adjusted_threshold) where base <= adjusted.
+    """
+    return min(oasdi_threshold, hi_threshold), max(oasdi_threshold, hi_threshold)
+
+
+def needs_crossover_swap(oasdi_threshold: int, hi_threshold: int) -> bool:
+    """Check if OASDI threshold has crossed above HI threshold."""
+    return oasdi_threshold > hi_threshold
+
+
+class TestH6ThresholdCalculation:
+    """Test OASDI threshold progression during phase-out."""
+
+    def test_2045_single_threshold(self):
+        """2045 single OASDI threshold should be $32,500."""
+        oasdi_single, _ = calculate_oasdi_thresholds(2045)
+        assert oasdi_single == 32_500
+
+    def test_2045_joint_threshold(self):
+        """2045 joint OASDI threshold should be $65,000."""
+        _, oasdi_joint = calculate_oasdi_thresholds(2045)
+        assert oasdi_joint == 65_000
+
+    def test_2053_single_threshold(self):
+        """2053 single OASDI threshold should be $92,500."""
+        oasdi_single, _ = calculate_oasdi_thresholds(2053)
+        assert oasdi_single == 92_500
+
+    def test_2053_joint_threshold(self):
+        """2053 joint OASDI threshold should be $185,000."""
+        _, oasdi_joint = calculate_oasdi_thresholds(2053)
+        assert oasdi_joint == 185_000
+
+    def test_threshold_progression_single(self):
+        """Single thresholds should increase by $7,500 per year."""
+        expected = {
+            2045: 32_500,
+            2046: 40_000,
+            2047: 47_500,
+            2048: 55_000,
+            2049: 62_500,
+            2050: 70_000,
+            2051: 77_500,
+            2052: 85_000,
+            2053: 92_500,
+        }
+        for year, expected_val in expected.items():
+            oasdi_single, _ = calculate_oasdi_thresholds(year)
+            assert oasdi_single == expected_val, f"Year {year}"
+
+    def test_threshold_progression_joint(self):
+        """Joint thresholds should increase by $15,000 per year."""
+        expected = {
+            2045: 65_000,
+            2046: 80_000,
+            2047: 95_000,
+            2048: 110_000,
+            2049: 125_000,
+            2050: 140_000,
+            2051: 155_000,
+            2052: 170_000,
+            2053: 185_000,
+        }
+        for year, expected_val in expected.items():
+            _, oasdi_joint = calculate_oasdi_thresholds(year)
+            assert oasdi_joint == expected_val, f"Year {year}"
+
+
+class TestH6ThresholdCrossover:
+    """Test the threshold crossover detection and handling.
+
+    Key insight: During phase-out, OASDI thresholds rise above HI thresholds.
+    - HI thresholds are frozen at $34k single / $44k joint
+    - Joint filers cross immediately (2045: $65k > $44k)
+    - Single filers cross in 2046 ($40k > $34k)
+    """
+
+    def test_2045_single_no_crossover(self):
+        """In 2045, single OASDI ($32.5k) is below HI ($34k) - no swap needed."""
+        oasdi_single, _ = calculate_oasdi_thresholds(2045)
+        assert not needs_crossover_swap(oasdi_single, HI_SINGLE)
+        assert oasdi_single < HI_SINGLE
+
+    def test_2045_joint_has_crossover(self):
+        """In 2045, joint OASDI ($65k) exceeds HI ($44k) - swap needed."""
+        _, oasdi_joint = calculate_oasdi_thresholds(2045)
+        assert needs_crossover_swap(oasdi_joint, HI_JOINT)
+        assert oasdi_joint > HI_JOINT
+
+    def test_2046_single_has_crossover(self):
+        """In 2046, single OASDI ($40k) exceeds HI ($34k) - swap needed."""
+        oasdi_single, _ = calculate_oasdi_thresholds(2046)
+        assert needs_crossover_swap(oasdi_single, HI_SINGLE)
+        assert oasdi_single > HI_SINGLE
+
+    def test_all_years_joint_crossover(self):
+        """Joint filers have crossover in all phase-out years."""
+        for year in range(2045, 2054):
+            _, oasdi_joint = calculate_oasdi_thresholds(year)
+            assert needs_crossover_swap(oasdi_joint, HI_JOINT), f"Year {year}"
+
+    def test_single_crossover_starts_2046(self):
+        """Single filers cross over starting in 2046."""
+        # 2045: no crossover
+        oasdi_2045, _ = calculate_oasdi_thresholds(2045)
+        assert not needs_crossover_swap(oasdi_2045, HI_SINGLE)
+
+        # 2046+: crossover
+        for year in range(2046, 2054):
+            oasdi_single, _ = calculate_oasdi_thresholds(year)
+            assert needs_crossover_swap(oasdi_single, HI_SINGLE), f"Year {year}"
+
+
+class TestH6ThresholdSwapping:
+    """Test min/max swap ensures base <= adjusted_base."""
+
+    def test_swap_when_oasdi_higher(self):
+        """When OASDI > HI, swap puts HI in base slot."""
+        oasdi = 65_000
+        hi = 44_000
+        base, adjusted = get_swapped_thresholds(oasdi, hi)
+        assert base == hi == 44_000
+        assert adjusted == oasdi == 65_000
+        assert base <= adjusted
+
+    def test_no_swap_when_oasdi_lower(self):
+        """When OASDI < HI, no swap needed."""
+        oasdi = 32_500
+        hi = 34_000
+        base, adjusted = get_swapped_thresholds(oasdi, hi)
+        assert base == oasdi == 32_500
+        assert adjusted == hi == 34_000
+        assert base <= adjusted
+
+    def test_swap_preserves_ordering_all_years(self):
+        """Swapped thresholds always maintain base <= adjusted."""
+        for year in range(2045, 2054):
+            oasdi_single, oasdi_joint = calculate_oasdi_thresholds(year)
+
+            base_s, adj_s = get_swapped_thresholds(oasdi_single, HI_SINGLE)
+            base_j, adj_j = get_swapped_thresholds(oasdi_joint, HI_JOINT)
+
+            assert base_s <= adj_s, f"Single ordering violated in {year}"
+            assert base_j <= adj_j, f"Joint ordering violated in {year}"
+
+
+class TestH6RateSwapping:
+    """Test rate swapping logic during transition.
+
+    Key insight: PolicyEngine requires one rate structure per year.
+    When thresholds cross, we swap to (0.35, 0.85) to minimize error.
+    """
+
+    def test_2045_error_analysis(self):
+        """In 2045, swapped rates minimize error vs default rates."""
+        # 2045 situation:
+        # Single: OASDI=$32.5k, HI=$34k -> $1.5k range affected
+        # Joint: OASDI=$65k, HI=$44k -> $21k range affected
+
+        # With swapped rates (0.35/0.85 instead of 0.50/0.85):
+        # Single: undertax by 15% on $1.5k = $225
+        # With default rates (0.50/0.85):
+        # Joint: overtax by 15% on $21k = $3,150
+
+        single_range = 34_000 - 32_500  # $1,500
+        joint_range = 65_000 - 44_000  # $21,000
+
+        rate_diff = 0.50 - 0.35  # 15%
+
+        single_error_swapped = single_range * rate_diff  # $225 undertax
+        joint_error_default = joint_range * rate_diff  # $3,150 overtax
+
+        assert single_error_swapped == pytest.approx(225)
+        assert joint_error_default == pytest.approx(3_150)
+        assert joint_error_default / single_error_swapped == pytest.approx(14.0), (
+            "Swapped rates should have 14x less error"
+        )
+
+    def test_swapped_rates_align_with_tax_cut_intent(self):
+        """Swapped rates undertax (not overtax), aligning with reform intent."""
+        # H6 is a tax cut - undertaxing is more aligned with legislative intent
+        # than overtaxing would be
+        single_undertax = (34_000 - 32_500) * 0.15  # $225
+        assert single_undertax > 0  # Positive = undertax (taxpayer-favorable)
+
+
+class TestH6EliminationPhase:
+    """Test the post-2054 elimination phase parameters."""
+
+    def test_elimination_thresholds(self):
+        """After 2054, only HI thresholds remain active."""
+        # Base thresholds = HI ($34k/$44k)
+        # Adjusted thresholds = very high (effectively disabled)
+        INFINITY_THRESHOLD = 9_999_999
+
+        assert HI_SINGLE == 34_000
+        assert HI_JOINT == 44_000
+        assert INFINITY_THRESHOLD > HI_SINGLE * 100
+        assert INFINITY_THRESHOLD > HI_JOINT * 100
+
+    def test_elimination_rates(self):
+        """After 2054, both tiers use 35% (HI-only rate)."""
+        HI_RATE = 0.35
+        OASDI_RATE = 0.50  # eliminated
+
+        # In elimination phase, tier 1 = 35%, tier 2 = 35% (no additional)
+        assert HI_RATE == 0.35
+        assert HI_RATE + OASDI_RATE == 0.85  # was combined rate

From b717398f93cfe35d108998c4c057680fe5db43e6 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Wed, 3 Dec 2025 17:01:43 -0500
Subject: [PATCH 11/12] Format test_h6_reform.py with black

---
 tests/test_h6_reform.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tests/test_h6_reform.py b/tests/test_h6_reform.py
index 478abc14..7253ed97 100644
--- a/tests/test_h6_reform.py
+++ b/tests/test_h6_reform.py
@@ -36,7 +36,9 @@ def get_swapped_thresholds(
 
     Returns (base_threshold, adjusted_threshold) where base <= adjusted.
     """
-    return min(oasdi_threshold, hi_threshold), max(oasdi_threshold, hi_threshold)
+    return min(oasdi_threshold, hi_threshold), max(
+        oasdi_threshold, hi_threshold
+    )
 
 
 def needs_crossover_swap(oasdi_threshold: int, hi_threshold: int) -> bool:
@@ -144,7 +146,9 @@ def test_single_crossover_starts_2046(self):
         # 2046+: crossover
         for year in range(2046, 2054):
             oasdi_single, _ = calculate_oasdi_thresholds(year)
-            assert needs_crossover_swap(oasdi_single, HI_SINGLE), f"Year {year}"
+            assert needs_crossover_swap(
+                oasdi_single, HI_SINGLE
+            ), f"Year {year}"
 
 
 class TestH6ThresholdSwapping:
@@ -208,9 +212,9 @@ def test_2045_error_analysis(self):
 
         assert single_error_swapped == pytest.approx(225)
         assert joint_error_default == pytest.approx(3_150)
-        assert joint_error_default / single_error_swapped == pytest.approx(14.0), (
-            "Swapped rates should have 14x less error"
-        )
+        assert joint_error_default / single_error_swapped == pytest.approx(
+            14.0
+        ), "Swapped rates should have 14x less error"
 
     def test_swapped_rates_align_with_tax_cut_intent(self):
         """Swapped rates undertax (not overtax), aligning with reform intent."""

From 96c2d9f1c616bf0a96f4f0d690157845ae252bfb Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Wed, 3 Dec 2025 17:05:12 -0500
Subject: [PATCH 12/12] Use Node.js 24 LTS (Active LTS) instead of 22

---
 .github/workflows/reusable_test.yaml | 2 +-
 changelog_entry.yaml                 | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/reusable_test.yaml b/.github/workflows/reusable_test.yaml
index 861d6e5c..99cfff16 100644
--- a/.github/workflows/reusable_test.yaml
+++ b/.github/workflows/reusable_test.yaml
@@ -48,7 +48,7 @@ jobs:
       - name: Set up Node.js
         uses: actions/setup-node@v4
         with:
-          node-version: '22'
+          node-version: '24'
 
       - uses: "google-github-actions/auth@v2"
         if: inputs.upload_data
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index 9a489ead..bd0c65df 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -4,7 +4,7 @@
     - GitHub Pages documentation deployment (was deploying wrong directory causing blank pages)
     - Removed timeout and error suppression from documentation build
     added:
-    - Node.js 22 LTS setup to CI workflow for MyST builds
+    - Node.js 24 LTS setup to CI workflow for MyST builds
     - H6 Social Security reform calibration for long-term projections (phases out OASDI taxation 2045-2054)
     - H6 threshold crossover handling when OASDI thresholds exceed HI thresholds
     - start_year parameter to run_household_projection.py CLI