diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29b..c3d09844 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,8 @@
+- bump: minor
+ changes:
+ added:
+ - SPM threshold calculation using policyengine/spm-calculator package
+ - New utility module (policyengine_us_data/utils/spm.py) for SPM calculations
+ changed:
+ - CPS datasets now calculate SPM thresholds using spm-calculator with Census-provided geographic adjustments
+ - ACS datasets now calculate SPM thresholds using spm-calculator with national-level thresholds
diff --git a/docs/long_term_projections.ipynb b/docs/long_term_projections.ipynb
deleted file mode 100644
index 10b07b72..00000000
--- a/docs/long_term_projections.ipynb
+++ /dev/null
@@ -1,1015 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": "# Long Term Projections\n## Integrating Economic Uprating with Demographic Reweighting"
- },
- {
- "cell_type": "markdown",
- "source": "## Executive Summary\n\nThis document outlines an innovative approach for projecting federal income tax revenue through 2100 that uniquely combines sophisticated economic microsimulation with demographic reweighting. By harmonizing PolicyEngine's state-of-the-art tax modeling with Social Security Administration demographic projections, we can isolate and quantify the fiscal impact of population aging while preserving the full complexity of the tax code.",
- "metadata": {}
- },
- {
- "cell_type": "markdown",
- "source": "## The Challenge\n\nProjecting tax revenue over a 75-year horizon requires simultaneously modeling two distinct but interrelated dynamics:\n\n**Economic Evolution**: How incomes, prices, and tax parameters change over time\n- Wage growth and income distribution shifts\n- Inflation affecting brackets and deductions\n- Legislative changes and indexing rules\n- Behavioral responses to tax policy\n\n**Demographic Transformation**: How the population structure evolves\n- Baby boom generation aging through retirement\n- Declining birth rates reducing working-age population\n- Increasing longevity extending retirement duration\n- Shifting household composition patterns\n\nTraditional approaches typically sacrifice either economic sophistication (using simplified tax calculations) or demographic realism (holding age distributions constant). Our methodology preserves both.",
- "metadata": {}
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Run projections using `run_household_projection.py`:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2025-11-19T19:56:10.232617Z",
- "iopub.status.busy": "2025-11-19T19:56:10.232523Z",
- "iopub.status.idle": "2025-11-19T19:57:25.892674Z",
- "shell.execute_reply": "2025-11-19T19:57:25.892382Z"
- }
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "TEST_LITE == False\r\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "======================================================================\r\n",
- "HOUSEHOLD-LEVEL INCOME TAX PROJECTION: 2025-2027\r\n",
- "======================================================================\r\n",
- "\r\n",
- "Configuration:\r\n",
- " Base year: 2024 (CPS microdata)\r\n",
- " Projection: 2025-2027\r\n",
- " Calculation level: HOUSEHOLD ONLY (simplified)\r\n",
- " Calibration method: GREG\r\n",
- " Including Social Security benefits constraint: Yes\r\n",
- " Including taxable payroll constraint: Yes\r\n",
- " Saving year-specific .h5 files: Yes (to ./projected_datasets/)\r\n",
- " Years to process: 3\r\n",
- " Estimated time: ~9 minutes\r\n",
- "\r\n",
- "======================================================================\r\n",
- "STEP 1: DEMOGRAPHIC PROJECTIONS\r\n",
- "======================================================================\r\n",
- "\r\n",
- "Loaded SSA projections: 86 ages x 3 years\r\n",
- "\r\n",
- "Population projections:\r\n",
- " 2025: 346.6M\r\n",
- " 2027: 350.9M\r\n",
- "\r\n",
- "======================================================================\r\n",
- "STEP 2: BUILDING HOUSEHOLD AGE COMPOSITION\r\n",
- "======================================================================\r\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\r\n",
- "Loaded 21,532 households\r\n",
- "Household age matrix shape: (21532, 86)\r\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\r\n",
- "======================================================================\r\n",
- "STEP 3: HOUSEHOLD-LEVEL PROJECTION\r\n",
- "======================================================================\r\n",
- "\r\n",
- "Methodology (SIMPLIFIED):\r\n",
- " 1. PolicyEngine uprates to each projection year\r\n",
- " 2. Calculate all values at household level (map_to='household')\r\n",
- " 3. IPF/GREG adjusts weights to match SSA demographics\r\n",
- " 4. Apply calibrated weights directly (no aggregation needed)\r\n",
- "\r\n",
- "Initial memory usage: 1.13 GB\r\n",
- "\r\n",
- "Year Population Income Tax Baseline Tax Memory\r\n",
- "-----------------------------------------------------------------\r\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " [DEBUG 2025] SS baseline: $1424.6B, target: $1609.0B\r\n",
- " [DEBUG 2025] Payroll baseline: $8950.9B, target: $10621.0B\r\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " [DEBUG 2025] SS achieved: $1609.0B (error: -0.0%)\r\n",
- " [DEBUG 2025] Payroll achieved: $10621.0B (error: -0.0%)\r\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " Saved 2025.h5\r\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2025 346.6M $ 2543.1B $ 1882.9B 4.32GB\r\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " [DEBUG 2027] SS baseline: $1495.1B, target: $1799.9B\r\n",
- " [DEBUG 2027] Payroll baseline: $9718.4B, target: $11627.0B\r\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " [DEBUG 2027] SS achieved: $1799.9B (error: -0.0%)\r\n",
- " [DEBUG 2027] Payroll achieved: $11627.0B (error: 0.0%)\r\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " Saved 2027.h5\r\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2027 350.9M $ 2873.8B $ 2125.0B 4.62GB\r\n"
- ]
- }
- ],
- "source": [
- "# Save calibrated datasets as .h5 files for each year\n",
- "!python ../policyengine_us_data/datasets/cps/long_term/run_household_projection.py 2027 --greg --use-ss --use-payroll --save-h5"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Arguments:**\n",
- "- `END_YEAR`: Target year for projection (default: 2035)\n",
- "- `--greg`: Use GREG calibration instead of IPF (optional)\n",
- "- `--use-ss`: Include Social Security benefit totals as calibration target (requires --greg)\n",
- "- `--use-payroll`: Include taxable payroll as calibration target (requires --greg)\n",
- "- `--save-h5`: Save year-specific .h5 files to `./projected_datasets/` directory"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## The Challenge\n",
- "\n",
- "Projecting tax revenue over a 75-year horizon requires simultaneously modeling two distinct but interrelated dynamics:\n",
- "\n",
- "**Economic Evolution**: How incomes, prices, and tax parameters change over time\n",
- "- Wage growth and income distribution shifts\n",
- "- Inflation affecting brackets and deductions\n",
- "- Legislative changes and indexing rules\n",
- "- Behavioral responses to tax policy\n",
- "\n",
- "**Demographic Transformation**: How the population structure evolves\n",
- "- Baby boom generation aging through retirement\n",
- "- Declining birth rates reducing working-age population\n",
- "- Increasing longevity extending retirement duration\n",
- "- Shifting household composition patterns\n",
- "\n",
- "Traditional approaches typically sacrifice either economic sophistication (using simplified tax calculations) or demographic realism (holding age distributions constant). Our methodology preserves both."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Data Sources\n",
- "\n",
- "The long-term projections use two key SSA datasets:\n",
- "\n",
- "1. **SSA Population Projections** (`SSPopJul_TR2024.csv`)\n",
- " - Source: [SSA 2024 Trustees Report - Single Year Age Demographic Projections](https://www.ssa.gov/oact/HistEst/Population/2024/Population2024.html)\n",
- " - Contains age-specific population projections through 2100\n",
- " - Used for demographic reweighting to match future population structure\n",
- "\n",
- "2. **Social Security Cost Projections** (`social_security_aux.csv`)\n",
- " - Source: [SSA 2025 Trustees Report, Table VI.G9](https://www.ssa.gov/oact/TR/2025/index.html)\n",
- " - Contains OASDI benefit cost projections in CPI-indexed 2025 dollars\n",
- " - Used as calibration target in GREG method to ensure fiscal consistency"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2025-11-19T19:57:25.894248Z",
- "iopub.status.busy": "2025-11-19T19:57:25.894165Z",
- "iopub.status.idle": "2025-11-19T19:57:28.259561Z",
- "shell.execute_reply": "2025-11-19T19:57:28.259297Z"
- }
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/baogorek/envs/pe/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
- " from .autonotebook import tqdm as notebook_tqdm\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "TEST_LITE == False\n"
- ]
- }
- ],
- "source": [
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "from pathlib import Path\n",
- "\n",
- "from policyengine_us_data.storage import STORAGE_FOLDER"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2025-11-19T19:57:28.260599Z",
- "iopub.status.busy": "2025-11-19T19:57:28.260502Z",
- "iopub.status.idle": "2025-11-19T19:57:28.275248Z",
- "shell.execute_reply": "2025-11-19T19:57:28.275040Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Year | \n",
- " Age | \n",
- " Total | \n",
- " M Tot | \n",
- " M Sin | \n",
- " M Mar | \n",
- " M Wid | \n",
- " M Div | \n",
- " F Tot | \n",
- " F Sin | \n",
- " F Mar | \n",
- " F Wid | \n",
- " F Div | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 1941 | \n",
- " 0 | \n",
- " 2492508 | \n",
- " 1276328 | \n",
- " 1276328 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1216180 | \n",
- " 1216180 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 1941 | \n",
- " 1 | \n",
- " 2384290 | \n",
- " 1218524 | \n",
- " 1218524 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1165766 | \n",
- " 1165766 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 1941 | \n",
- " 2 | \n",
- " 2445054 | \n",
- " 1246078 | \n",
- " 1246078 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1198976 | \n",
- " 1198976 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 1941 | \n",
- " 3 | \n",
- " 2395999 | \n",
- " 1219543 | \n",
- " 1219543 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1176456 | \n",
- " 1176456 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 1941 | \n",
- " 4 | \n",
- " 2275425 | \n",
- " 1157612 | \n",
- " 1157612 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1117813 | \n",
- " 1117813 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Year Age Total M Tot M Sin M Mar M Wid M Div F Tot \\\n",
- "0 1941 0 2492508 1276328 1276328 0 0 0 1216180 \n",
- "1 1941 1 2384290 1218524 1218524 0 0 0 1165766 \n",
- "2 1941 2 2445054 1246078 1246078 0 0 0 1198976 \n",
- "3 1941 3 2395999 1219543 1219543 0 0 0 1176456 \n",
- "4 1941 4 2275425 1157612 1157612 0 0 0 1117813 \n",
- "\n",
- " F Sin F Mar F Wid F Div \n",
- "0 1216180 0 0 0 \n",
- "1 1165766 0 0 0 \n",
- "2 1198976 0 0 0 \n",
- "3 1176456 0 0 0 \n",
- "4 1117813 0 0 0 "
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Load SSA population data\n",
- "ssa_pop = pd.read_csv(STORAGE_FOLDER / 'SSPopJul_TR2024.csv')\n",
- "ssa_pop.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2025-11-19T19:57:28.276266Z",
- "iopub.status.busy": "2025-11-19T19:57:28.276171Z",
- "iopub.status.idle": "2025-11-19T19:57:28.280120Z",
- "shell.execute_reply": "2025-11-19T19:57:28.279840Z"
- }
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " year | \n",
- " oasdi_cost_in_billion_2025_usd | \n",
- " cpi_w_intermediate | \n",
- " oasdi_cost_in_billion_nominal_usd | \n",
- " taxable_payroll_in_billion_nominal_usd | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 2025 | \n",
- " 1609 | \n",
- " 100.00 | \n",
- " 1609.0000 | \n",
- " 10621.0 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 2026 | \n",
- " 1660 | \n",
- " 102.49 | \n",
- " 1701.3340 | \n",
- " 11129.0 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 2027 | \n",
- " 1715 | \n",
- " 104.95 | \n",
- " 1799.8925 | \n",
- " 11627.0 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 2028 | \n",
- " 1763 | \n",
- " 107.47 | \n",
- " 1894.6961 | \n",
- " 12159.0 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 2029 | \n",
- " 1810 | \n",
- " 110.05 | \n",
- " 1991.9050 | \n",
- " 12696.0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " year oasdi_cost_in_billion_2025_usd cpi_w_intermediate \\\n",
- "0 2025 1609 100.00 \n",
- "1 2026 1660 102.49 \n",
- "2 2027 1715 104.95 \n",
- "3 2028 1763 107.47 \n",
- "4 2029 1810 110.05 \n",
- "\n",
- " oasdi_cost_in_billion_nominal_usd taxable_payroll_in_billion_nominal_usd \n",
- "0 1609.0000 10621.0 \n",
- "1 1701.3340 11129.0 \n",
- "2 1799.8925 11627.0 \n",
- "3 1894.6961 12159.0 \n",
- "4 1991.9050 12696.0 "
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Load Social Security auxiliary data\n",
- "ss_aux = pd.read_csv(STORAGE_FOLDER / 'social_security_aux.csv')\n",
- "ss_aux.head()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Core Innovation\n",
- "\n",
- "Our approach operates in two complementary stages:\n",
- "\n",
- "### Stage 1: Economic Uprating\n",
- "\n",
- "PolicyEngine's microsimulation engine projects each household's economic circumstances forward using:\n",
- "\n",
- "**Sophisticated Income Modeling**\n",
- "\n",
- "The system models 17 distinct income categories, each uprated according to its economic fundamentals:\n",
- "\n",
- "*Primary Categories with Specific Projections:*\n",
- "- Employment income (wages) - follows CBO wage growth projections\n",
- "- Self-employment income - follows CBO business income projections\n",
- "- Capital gains - follows CBO asset appreciation projections\n",
- "- Interest income - follows CBO interest rate projections\n",
- "- Dividend income - follows CBO corporate profit projections\n",
- "- Pension income - follows CBO retirement income projections\n",
- "- Social Security - follows SSA COLA projections (available through 2100)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Stage 2: Demographic Reweighting\n",
- "\n",
- "We offer two calibration methods for adjusting household weights to match SSA projections:\n",
- "\n",
- "**Method 1: Iterative Proportional Fitting (IPF)**\n",
- "- Traditional raking approach using Kullback-Leibler divergence\n",
- "- Iteratively adjusts weights to match marginal distributions\n",
- "- Robust to specification and always produces non-negative weights\n",
- "- Default method for backward compatibility\n",
- "\n",
- "**Method 2: Generalized Regression (GREG) Calibration**\n",
- "- Modern calibration using chi-squared distance minimization\n",
- "- Enables simultaneous calibration to categorical AND continuous variables\n",
- "- Direct solution via matrix operations (no iteration needed)\n",
- "- Required for incorporating Social Security benefit constraints"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Demonstrating the Calibration Methods"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2025-11-19T19:57:28.280998Z",
- "iopub.status.busy": "2025-11-19T19:57:28.280918Z",
- "iopub.status.idle": "2025-11-19T19:57:28.282636Z",
- "shell.execute_reply": "2025-11-19T19:57:28.282472Z"
- }
- },
- "outputs": [],
- "source": [
- "from policyengine_us_data.datasets.cps.long_term.ssa_data import (\n",
- " load_ssa_age_projections,\n",
- " load_ssa_benefit_projections\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2025-11-19T19:57:28.283285Z",
- "iopub.status.busy": "2025-11-19T19:57:28.283217Z",
- "iopub.status.idle": "2025-11-19T19:57:28.296829Z",
- "shell.execute_reply": "2025-11-19T19:57:28.296600Z"
- }
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Age distribution targets for 2025:\n",
- "Shape: (86, 1)\n",
- "Total population: 346577.3M\n"
- ]
- }
- ],
- "source": [
- "# Get SSA population targets for a specific year\n",
- "year = 2025\n",
- "age_targets = load_ssa_age_projections(end_year=year)\n",
- "print(f\"\\nAge distribution targets for {year}:\")\n",
- "print(f\"Shape: {age_targets.shape}\")\n",
- "print(f\"Total population: {age_targets[:, 0].sum() / 1000:.1f}M\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2025-11-19T19:57:28.297698Z",
- "iopub.status.busy": "2025-11-19T19:57:28.297611Z",
- "iopub.status.idle": "2025-11-19T19:57:28.300109Z",
- "shell.execute_reply": "2025-11-19T19:57:28.299938Z"
- }
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Social Security benefit target for 2025: $1609.0B\n"
- ]
- }
- ],
- "source": [
- "# Get Social Security benefit target\n",
- "ss_target = load_ssa_benefit_projections(year)\n",
- "print(f\"\\nSocial Security benefit target for {year}: ${ss_target / 1e9:.1f}B\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### PWBM Analysis: Eliminating Income Taxes on Social Security Benefits\n",
- "\n",
- "**Source:** [Eliminating Income Taxes on Social Security Benefits](https://budgetmodel.wharton.upenn.edu/issues/2025/2/10/eliminating-income-taxes-on-social-security-benefits) (Penn Wharton Budget Model, February 10, 2025)\n",
- "\n",
- "---\n",
- "\n",
- "#### Policy Analyzed\n",
- "The Penn Wharton Budget Model (PWBM) analyzed a policy proposal to permanently eliminate all income taxes on Social Security benefits, effective January 1, 2025.\n",
- "\n",
- "#### Key Findings\n",
- "\n",
- "* **Budgetary Impact:** The policy is projected to reduce federal revenues by **$1.45 trillion** over the 10-year budget window (2025-2034). Over the long term, it is projected to increase federal debt by 7 percent by 2054, relative to the current baseline.\n",
- "\n",
- "* **Macroeconomic Impact:** The analysis finds the policy would have negative long-term effects on the economy.\n",
- " * It reduces incentives for households to save for retirement and to work.\n",
- " * This leads to a smaller capital stock (projected to be 4.2% lower by 2054).\n",
- " * The smaller capital stock results in lower average wages (1.8% lower by 2054) and lower GDP (2.1% lower by 2054).\n",
- "\n",
- "* **Conventional Distributional Impact (Your Table):** The table you shared shows the annual \"conventional\" effects on household after-tax income.\n",
- " * The largest average *dollar* tax cuts go to households in the top 20 percent of the income distribution (quintiles 80-100%).\n",
- " * The largest *relative* gains (as a percentage of income) go to households in the fourth quintile (60-80%), who see a 1.6% increase in after-tax income by 2054.\n",
- " * The dollar amounts shown are in **nominal dollars** for each specified year, not adjusted to a single base year.\n",
- "\n",
- "* **Dynamic (Lifetime) Impact:** When analyzing the policy's effects over a household's entire lifetime, PWBM finds:\n",
- " * The policy primarily benefits high-income households who are nearing or in retirement.\n",
- " * It negatively impacts all households under the age of 30 and all future generations, who would experience a net welfare loss due to the long-term effects of lower wages and higher federal debt."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### PolicyEngine's Analysis of Eliminating Income Taxes on Social Security Benefits"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "execution": {
- "iopub.execute_input": "2025-11-19T19:57:28.300920Z",
- "iopub.status.busy": "2025-11-19T19:57:28.300844Z",
- "iopub.status.idle": "2025-11-19T19:59:04.747344Z",
- "shell.execute_reply": "2025-11-19T19:59:04.747122Z"
- }
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "================================================================================\n",
- "WHARTON COMPARISON PIPELINE - YEAR 2054\n",
- "================================================================================\n",
- "\n",
- "Running PolicyEngine analysis...\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Loading dataset: hf://policyengine/test/2054.h5\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Ranking according to quantiles with: household_net_income\n",
- "✓ Analysis complete\n",
- " Revenue impact: $-579.1B\n",
- "\n",
- "Generating comparison table...\n",
- "\n",
- "================================================================================\n",
- "COMPARISON TABLE: 2054\n",
- "================================================================================\n",
- "\n",
- "Average Tax Change (per household):\n",
- " Income Group PolicyEngine Wharton Difference\n",
- " First quintile $-95 $-5 $-90\n",
- "Second quintile $-1,054 $-275 $-779\n",
- "Middle quintile $-2,241 $-1,730 $-511\n",
- "Fourth quintile $-4,633 $-3,560 $-1,073\n",
- " 80-90% $-6,737 $-4,075 $-2,662\n",
- " 90-95% $-12,121 $-4,385 $-7,736\n",
- " 95-99% $-8,066 $-4,565 $-3,501\n",
- " 99-99.9% $-7,257 $-4,820 $-2,437\n",
- " Top 0.1% $-8,615 $-5,080 $-3,535\n",
- "\n",
- "Percent Change in Income:\n",
- " Income Group PE % Wharton %\n",
- " First quintile -0.10000000149011612% 0.0%\n",
- "Second quintile 0.800000011920929% 0.3%\n",
- "Middle quintile 1.100000023841858% 1.3%\n",
- "Fourth quintile 1.399999976158142% 1.6%\n",
- " 80-90% 1.399999976158142% 1.2%\n",
- " 90-95% 1.7000000476837158% 0.9%\n",
- " 95-99% 0.699999988079071% 0.6%\n",
- " 99-99.9% 0.30000001192092896% 0.2%\n",
- " Top 0.1% 0.10000000149011612% 0.0%\n",
- "\n"
- ]
- }
- ],
- "source": [
- "import sys\n",
- "import os\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "import gc\n",
- "\n",
- "from policyengine_us import Microsimulation\n",
- "from policyengine_core.reforms import Reform\n",
- "\n",
- "WHARTON_BENCHMARKS = {\n",
- " 2026: {\n",
- " 'First quintile': {'tax_change': 0, 'pct_change': 0.0},\n",
- " 'Second quintile': {'tax_change': -15, 'pct_change': 0.0},\n",
- " 'Middle quintile': {'tax_change': -340, 'pct_change': 0.5},\n",
- " 'Fourth quintile': {'tax_change': -1135, 'pct_change': 1.1},\n",
- " '80-90%': {'tax_change': -1625, 'pct_change': 1.0},\n",
- " '90-95%': {'tax_change': -1590, 'pct_change': 0.7},\n",
- " '95-99%': {'tax_change': -2020, 'pct_change': 0.5},\n",
- " '99-99.9%': {'tax_change': -2205, 'pct_change': 0.2},\n",
- " 'Top 0.1%': {'tax_change': -2450, 'pct_change': 0.0},\n",
- " },\n",
- " 2034: {\n",
- " 'First quintile': {'tax_change': 0, 'pct_change': 0.0},\n",
- " 'Second quintile': {'tax_change': -45, 'pct_change': 0.1},\n",
- " 'Middle quintile': {'tax_change': -615, 'pct_change': 0.8},\n",
- " 'Fourth quintile': {'tax_change': -1630, 'pct_change': 1.2},\n",
- " '80-90%': {'tax_change': -2160, 'pct_change': 1.1},\n",
- " '90-95%': {'tax_change': -2160, 'pct_change': 0.7},\n",
- " '95-99%': {'tax_change': -2605, 'pct_change': 0.6},\n",
- " '99-99.9%': {'tax_change': -2715, 'pct_change': 0.2},\n",
- " 'Top 0.1%': {'tax_change': -2970, 'pct_change': 0.0},\n",
- " },\n",
- " 2054: {\n",
- " 'First quintile': {'tax_change': -5, 'pct_change': 0.0},\n",
- " 'Second quintile': {'tax_change': -275, 'pct_change': 0.3},\n",
- " 'Middle quintile': {'tax_change': -1730, 'pct_change': 1.3},\n",
- " 'Fourth quintile': {'tax_change': -3560, 'pct_change': 1.6},\n",
- " '80-90%': {'tax_change': -4075, 'pct_change': 1.2},\n",
- " '90-95%': {'tax_change': -4385, 'pct_change': 0.9},\n",
- " '95-99%': {'tax_change': -4565, 'pct_change': 0.6},\n",
- " '99-99.9%': {'tax_change': -4820, 'pct_change': 0.2},\n",
- " 'Top 0.1%': {'tax_change': -5080, 'pct_change': 0.0},\n",
- " },\n",
- "}\n",
- "\n",
- "def run_analysis(dataset_path, year, income_rank_var = \"household_net_income\"):\n",
- " \"\"\"Run Option 1 analysis for given dataset and year\"\"\"\n",
- "\n",
- " option1_reform = Reform.from_dict(\n",
- " {\n",
- " # Base rate parameters (0-50% bracket)\n",
- " \"gov.irs.social_security.taxability.rate.base.benefit_cap\": {\n",
- " \"2026-01-01.2100-12-31\": 0\n",
- " },\n",
- " \"gov.irs.social_security.taxability.rate.base.excess\": {\n",
- " \"2026-01-01.2100-12-31\": 0\n",
- " },\n",
- " # Additional rate parameters (50-85% bracket)\n",
- " \"gov.irs.social_security.taxability.rate.additional.benefit_cap\": {\n",
- " \"2026-01-01.2100-12-31\": 0\n",
- " },\n",
- " \"gov.irs.social_security.taxability.rate.additional.bracket\": {\n",
- " \"2026-01-01.2100-12-31\": 0\n",
- " },\n",
- " \"gov.irs.social_security.taxability.rate.additional.excess\": {\n",
- " \"2026-01-01.2100-12-31\": 0\n",
- " }\n",
- " }, country_id=\"us\"\n",
- " )\n",
- " reform = Microsimulation(dataset=dataset_path, reform=option1_reform)\n",
- "\n",
- " # Get household data\n",
- " household_net_income_reform = reform.calculate(\"household_net_income\", period=year, map_to=\"household\")\n",
- " household_agi_reform = reform.calculate(\"adjusted_gross_income\", period=year, map_to=\"household\")\n",
- " income_tax_reform = reform.calculate(\"income_tax\", period=year, map_to=\"household\")\n",
- "\n",
- " del reform\n",
- " gc.collect()\n",
- "\n",
- " print(f\"Loading dataset: {dataset_path}\")\n",
- " baseline = Microsimulation(dataset=dataset_path)\n",
- " household_weight = baseline.calculate(\"household_weight\", period=year)\n",
- " household_net_income_baseline = baseline.calculate(\"household_net_income\", period=year, map_to=\"household\")\n",
- " household_agi_baseline = baseline.calculate(\"adjusted_gross_income\", period=year, map_to=\"household\")\n",
- " income_tax_baseline = baseline.calculate(\"income_tax\", period=year, map_to=\"household\")\n",
- "\n",
- " # Calculate changes\n",
- " tax_change = income_tax_reform - income_tax_baseline\n",
- " income_change_pct = (\n",
- " (household_net_income_reform - household_net_income_baseline) / household_net_income_baseline\n",
- " ) * 100\n",
- "\n",
- " # Create DataFrame\n",
- " df = pd.DataFrame({\n",
- " 'household_net_income': household_net_income_baseline,\n",
- " 'weight': household_weight,\n",
- " 'tax_change': tax_change,\n",
- " 'income_change_pct': income_change_pct,\n",
- " 'income_rank_var': baseline.calculate(income_rank_var, year, map_to=\"household\")\n",
- " })\n",
- " \n",
- " # Calculate percentiles\n",
- "\n",
- " print(f\"Ranking according to quantiles with: {income_rank_var}\")\n",
- " df['income_percentile'] = df['income_rank_var'].rank(pct=True) * 100\n",
- "\n",
- " # Assign income groups\n",
- " def assign_income_group(percentile):\n",
- " if percentile <= 20:\n",
- " return 'First quintile'\n",
- " elif percentile <= 40:\n",
- " return 'Second quintile'\n",
- " elif percentile <= 60:\n",
- " return 'Middle quintile'\n",
- " elif percentile <= 80:\n",
- " return 'Fourth quintile'\n",
- " elif percentile <= 90:\n",
- " return '80-90%'\n",
- " elif percentile <= 95:\n",
- " return '90-95%'\n",
- " elif percentile <= 99:\n",
- " return '95-99%'\n",
- " elif percentile <= 99.9:\n",
- " return '99-99.9%'\n",
- " else:\n",
- " return 'Top 0.1%'\n",
- "\n",
- " df['income_group'] = df['income_percentile'].apply(assign_income_group)\n",
- "\n",
- " # Calculate aggregate revenue\n",
- " revenue_impact = (income_tax_reform.sum() - income_tax_baseline.sum()) / 1e9\n",
- "\n",
- " # Calculate by group\n",
- " results = []\n",
- " for group in ['First quintile', 'Second quintile', 'Middle quintile', 'Fourth quintile',\n",
- " '80-90%', '90-95%', '95-99%', '99-99.9%', 'Top 0.1%']:\n",
- " group_data = df[df['income_group'] == group]\n",
- " if len(group_data) == 0:\n",
- " continue\n",
- "\n",
- " total_weight = group_data['weight'].sum()\n",
- " avg_tax_change = (group_data['tax_change'] * group_data['weight']).sum() / total_weight\n",
- " avg_income_change_pct = (group_data['income_change_pct'] * group_data['weight']).sum() / total_weight\n",
- "\n",
- " results.append({\n",
- " 'group': group,\n",
- " 'pe_tax_change': round(avg_tax_change),\n",
- " 'pe_pct_change': round(avg_income_change_pct, 1),\n",
- " })\n",
- "\n",
- " return pd.DataFrame(results), revenue_impact\n",
- "\n",
- "def generate_comparison_table(pe_results, year):\n",
- " \"\"\"Generate comparison table with Wharton benchmark\"\"\"\n",
- "\n",
- " if year not in WHARTON_BENCHMARKS:\n",
- " print(f\"Warning: No Wharton benchmark available for year {year}\")\n",
- " return pe_results\n",
- "\n",
- " wharton_data = WHARTON_BENCHMARKS[year]\n",
- "\n",
- " comparison = []\n",
- " for _, row in pe_results.iterrows():\n",
- " group = row['group']\n",
- " wharton = wharton_data.get(group, {'tax_change': None, 'pct_change': None})\n",
- "\n",
- " pe_tax = row['pe_tax_change']\n",
- " wh_tax = wharton['tax_change']\n",
- "\n",
- " comparison.append({\n",
- " 'Income Group': group,\n",
- " 'PolicyEngine': f\"${pe_tax:,}\",\n",
- " 'Wharton': f\"${wh_tax:,}\" if wh_tax is not None else 'N/A',\n",
- " 'Difference': f\"${(pe_tax - wh_tax):,}\" if wh_tax is not None else 'N/A',\n",
- " 'PE %': f\"{row['pe_pct_change']}%\",\n",
- " 'Wharton %': f\"{wharton['pct_change']}%\" if wharton['pct_change'] is not None else 'N/A',\n",
- " })\n",
- "\n",
- " return pd.DataFrame(comparison)\n",
- "\n",
- "dataset_path = 'hf://policyengine/test/2054.h5'\n",
- "year = 2054\n",
- "income_rank_variable = \"household_net_income\"\n",
- "\n",
- "print(\"=\"*80)\n",
- "print(f\"WHARTON COMPARISON PIPELINE - YEAR {year}\")\n",
- "print(\"=\"*80)\n",
- "print()\n",
- "\n",
- "# Run analysis\n",
- "print(\"Running PolicyEngine analysis...\")\n",
- "pe_results, revenue_impact = run_analysis(dataset_path, year, income_rank_variable)\n",
- "print(f\"✓ Analysis complete\")\n",
- "print(f\" Revenue impact: ${revenue_impact:.1f}B\")\n",
- "print()\n",
- "\n",
- "# Generate comparison table\n",
- "print(\"Generating comparison table...\")\n",
- "comparison_table = generate_comparison_table(pe_results, year)\n",
- "\n",
- "print()\n",
- "print(\"=\"*80)\n",
- "print(f\"COMPARISON TABLE: {year}\")\n",
- "print(\"=\"*80)\n",
- "print()\n",
- "print(\"Average Tax Change (per household):\")\n",
- "print(comparison_table[['Income Group', 'PolicyEngine', 'Wharton', 'Difference']].to_string(index=False))\n",
- "print()\n",
- "print(\"Percent Change in Income:\")\n",
- "print(comparison_table[['Income Group', 'PE %', 'Wharton %']].to_string(index=False))\n",
- "print()"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.13.6"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
\ No newline at end of file
diff --git a/docs/long_term_projections.md b/docs/long_term_projections.md
new file mode 100644
index 00000000..cc5d14c0
--- /dev/null
+++ b/docs/long_term_projections.md
@@ -0,0 +1,348 @@
+# Long Term Projections
+## Integrating Economic Uprating with Demographic Reweighting
+
+## Executive Summary
+
+This document outlines an innovative approach for projecting federal income tax revenue through 2100 that uniquely combines sophisticated economic microsimulation with demographic reweighting. By harmonizing PolicyEngine's state-of-the-art tax modeling with Social Security Administration demographic projections, we can isolate and quantify the fiscal impact of population aging while preserving the full complexity of the tax code.
+
+## The Challenge
+
+Projecting tax revenue over a 75-year horizon requires simultaneously modeling two distinct but interrelated dynamics:
+
+**Economic Evolution**: How incomes, prices, and tax parameters change over time
+- Wage growth and income distribution shifts
+- Inflation affecting brackets and deductions
+- Legislative changes and indexing rules
+- Behavioral responses to tax policy
+
+**Demographic Transformation**: How the population structure evolves
+- Baby boom generation aging through retirement
+- Declining birth rates reducing working-age population
+- Increasing longevity extending retirement duration
+- Shifting household composition patterns
+
+Traditional approaches typically sacrifice either economic sophistication (using simplified tax calculations) or demographic realism (holding age distributions constant). Our methodology preserves both.
+
+## Running Projections
+
+Run projections using `run_household_projection.py`:
+
+```bash
+# Save calibrated datasets as .h5 files for each year
+python ../policyengine_us_data/datasets/cps/long_term/run_household_projection.py 2027 --greg --use-ss --use-payroll --save-h5
+```
+
+**Arguments:**
+- `END_YEAR`: Target year for projection (default: 2035)
+- `--greg`: Use GREG calibration instead of IPF (optional)
+- `--use-ss`: Include Social Security benefit totals as calibration target (requires --greg)
+- `--use-payroll`: Include taxable payroll as calibration target (requires --greg)
+- `--save-h5`: Save year-specific .h5 files to `./projected_datasets/` directory
+
+## Data Sources
+
+The long-term projections use two key SSA datasets:
+
+1. **SSA Population Projections** (`SSPopJul_TR2024.csv`)
+ - Source: [SSA 2024 Trustees Report - Single Year Age Demographic Projections](https://www.ssa.gov/oact/HistEst/Population/2024/Population2024.html)
+ - Contains age-specific population projections through 2100
+ - Used for demographic reweighting to match future population structure
+
+2. **Social Security Cost Projections** (`social_security_aux.csv`)
+ - Source: [SSA 2025 Trustees Report, Table VI.G9](https://www.ssa.gov/oact/TR/2025/index.html)
+ - Contains OASDI benefit cost projections in CPI-indexed 2025 dollars
+ - Used as calibration target in GREG method to ensure fiscal consistency
+
+### Loading SSA Data
+
+```python
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from pathlib import Path
+
+from policyengine_us_data.storage import STORAGE_FOLDER
+
+# Load SSA population data
+ssa_pop = pd.read_csv(STORAGE_FOLDER / 'SSPopJul_TR2024.csv')
+ssa_pop.head()
+
+# Load Social Security auxiliary data
+ss_aux = pd.read_csv(STORAGE_FOLDER / 'social_security_aux.csv')
+ss_aux.head()
+```
+
+## Core Innovation
+
+Our approach operates in two complementary stages:
+
+### Stage 1: Economic Uprating
+
+PolicyEngine's microsimulation engine projects each household's economic circumstances forward using:
+
+**Sophisticated Income Modeling**
+
+The system models 17 distinct income categories, each uprated according to its economic fundamentals:
+
+*Primary Categories with Specific Projections:*
+- Employment income (wages) - follows CBO wage growth projections
+- Self-employment income - follows CBO business income projections
+- Capital gains - follows CBO asset appreciation projections
+- Interest income - follows CBO interest rate projections
+- Dividend income - follows CBO corporate profit projections
+- Pension income - follows CBO retirement income projections
+- Social Security - follows SSA COLA projections (available through 2100)
+
+### Stage 2: Demographic Reweighting
+
+We offer two calibration methods for adjusting household weights to match SSA projections:
+
+**Method 1: Iterative Proportional Fitting (IPF)**
+- Traditional raking approach using Kullback-Leibler divergence
+- Iteratively adjusts weights to match marginal distributions
+- Robust to specification and always produces non-negative weights
+- Default method for backward compatibility
+
+**Method 2: Generalized Regression (GREG) Calibration**
+- Modern calibration using chi-squared distance minimization
+- Enables simultaneous calibration to categorical AND continuous variables
+- Direct solution via matrix operations (no iteration needed)
+- Required for incorporating Social Security benefit constraints
+
+## Demonstrating the Calibration Methods
+
+```python
+from policyengine_us_data.datasets.cps.long_term.ssa_data import (
+ load_ssa_age_projections,
+ load_ssa_benefit_projections
+)
+
+# Get SSA population targets for a specific year
+year = 2025
+age_targets = load_ssa_age_projections(end_year=year)
+print(f"\nAge distribution targets for {year}:")
+print(f"Shape: {age_targets.shape}")
+print(f"Total population: {age_targets[:, 0].sum() / 1000:.1f}M")
+
+# Get Social Security benefit target
+ss_target = load_ssa_benefit_projections(year)
+print(f"\nSocial Security benefit target for {year}: ${ss_target / 1e9:.1f}B")
+```
+
+## PWBM Analysis: Eliminating Income Taxes on Social Security Benefits
+
+**Source:** [Eliminating Income Taxes on Social Security Benefits](https://budgetmodel.wharton.upenn.edu/issues/2025/2/10/eliminating-income-taxes-on-social-security-benefits) (Penn Wharton Budget Model, February 10, 2025)
+
+---
+
+### Policy Analyzed
+The Penn Wharton Budget Model (PWBM) analyzed a policy proposal to permanently eliminate all income taxes on Social Security benefits, effective January 1, 2025.
+
+### Key Findings
+
+* **Budgetary Impact:** The policy is projected to reduce federal revenues by **$1.45 trillion** over the 10-year budget window (2025-2034). Over the long term, it is projected to increase federal debt by 7 percent by 2054, relative to the current baseline.
+
+* **Macroeconomic Impact:** The analysis finds the policy would have negative long-term effects on the economy.
+ * It reduces incentives for households to save for retirement and to work.
+ * This leads to a smaller capital stock (projected to be 4.2% lower by 2054).
+ * The smaller capital stock results in lower average wages (1.8% lower by 2054) and lower GDP (2.1% lower by 2054).
+
+* **Conventional Distributional Impact (Your Table):** The table you shared shows the annual "conventional" effects on household after-tax income.
+ * The largest average *dollar* tax cuts go to households in the top 20 percent of the income distribution (quintiles 80-100%).
+ * The largest *relative* gains (as a percentage of income) go to households in the fourth quintile (60-80%), who see a 1.6% increase in after-tax income by 2054.
+ * The dollar amounts shown are in **nominal dollars** for each specified year, not adjusted to a single base year.
+
+* **Dynamic (Lifetime) Impact:** When analyzing the policy's effects over a household's entire lifetime, PWBM finds:
+ * The policy primarily benefits high-income households who are nearing or in retirement.
+ * It negatively impacts all households under the age of 30 and all future generations, who would experience a net welfare loss due to the long-term effects of lower wages and higher federal debt.
+
+## PolicyEngine's Analysis of Eliminating Income Taxes on Social Security Benefits
+
+```python
+import sys
+import os
+import pandas as pd
+import numpy as np
+import gc
+
+from policyengine_us import Microsimulation
+from policyengine_core.reforms import Reform
+
+WHARTON_BENCHMARKS = {
+ 2026: {
+ 'First quintile': {'tax_change': 0, 'pct_change': 0.0},
+ 'Second quintile': {'tax_change': -15, 'pct_change': 0.0},
+ 'Middle quintile': {'tax_change': -340, 'pct_change': 0.5},
+ 'Fourth quintile': {'tax_change': -1135, 'pct_change': 1.1},
+ '80-90%': {'tax_change': -1625, 'pct_change': 1.0},
+ '90-95%': {'tax_change': -1590, 'pct_change': 0.7},
+ '95-99%': {'tax_change': -2020, 'pct_change': 0.5},
+ '99-99.9%': {'tax_change': -2205, 'pct_change': 0.2},
+ 'Top 0.1%': {'tax_change': -2450, 'pct_change': 0.0},
+ },
+ 2034: {
+ 'First quintile': {'tax_change': 0, 'pct_change': 0.0},
+ 'Second quintile': {'tax_change': -45, 'pct_change': 0.1},
+ 'Middle quintile': {'tax_change': -615, 'pct_change': 0.8},
+ 'Fourth quintile': {'tax_change': -1630, 'pct_change': 1.2},
+ '80-90%': {'tax_change': -2160, 'pct_change': 1.1},
+ '90-95%': {'tax_change': -2160, 'pct_change': 0.7},
+ '95-99%': {'tax_change': -2605, 'pct_change': 0.6},
+ '99-99.9%': {'tax_change': -2715, 'pct_change': 0.2},
+ 'Top 0.1%': {'tax_change': -2970, 'pct_change': 0.0},
+ },
+ 2054: {
+ 'First quintile': {'tax_change': -5, 'pct_change': 0.0},
+ 'Second quintile': {'tax_change': -275, 'pct_change': 0.3},
+ 'Middle quintile': {'tax_change': -1730, 'pct_change': 1.3},
+ 'Fourth quintile': {'tax_change': -3560, 'pct_change': 1.6},
+ '80-90%': {'tax_change': -4075, 'pct_change': 1.2},
+ '90-95%': {'tax_change': -4385, 'pct_change': 0.9},
+ '95-99%': {'tax_change': -4565, 'pct_change': 0.6},
+ '99-99.9%': {'tax_change': -4820, 'pct_change': 0.2},
+ 'Top 0.1%': {'tax_change': -5080, 'pct_change': 0.0},
+ },
+}
+
+def run_analysis(dataset_path, year, income_rank_var = "household_net_income"):
+ """Run Option 1 analysis for given dataset and year"""
+
+ option1_reform = Reform.from_dict(
+ {
+ # Base rate parameters (0-50% bracket)
+ "gov.irs.social_security.taxability.rate.base.benefit_cap": {
+ "2026-01-01.2100-12-31": 0
+ },
+ "gov.irs.social_security.taxability.rate.base.excess": {
+ "2026-01-01.2100-12-31": 0
+ },
+ # Additional rate parameters (50-85% bracket)
+ "gov.irs.social_security.taxability.rate.additional.benefit_cap": {
+ "2026-01-01.2100-12-31": 0
+ },
+ "gov.irs.social_security.taxability.rate.additional.bracket": {
+ "2026-01-01.2100-12-31": 0
+ },
+ "gov.irs.social_security.taxability.rate.additional.excess": {
+ "2026-01-01.2100-12-31": 0
+ }
+ }, country_id="us"
+ )
+ reform = Microsimulation(dataset=dataset_path, reform=option1_reform)
+
+ # Get household data
+ household_net_income_reform = reform.calculate("household_net_income", period=year, map_to="household")
+ household_agi_reform = reform.calculate("adjusted_gross_income", period=year, map_to="household")
+ income_tax_reform = reform.calculate("income_tax", period=year, map_to="household")
+
+ del reform
+ gc.collect()
+
+ print(f"Loading dataset: {dataset_path}")
+ baseline = Microsimulation(dataset=dataset_path)
+ household_weight = baseline.calculate("household_weight", period=year)
+ household_net_income_baseline = baseline.calculate("household_net_income", period=year, map_to="household")
+ household_agi_baseline = baseline.calculate("adjusted_gross_income", period=year, map_to="household")
+ income_tax_baseline = baseline.calculate("income_tax", period=year, map_to="household")
+
+ # Calculate changes
+ tax_change = income_tax_reform - income_tax_baseline
+ income_change_pct = (
+ (household_net_income_reform - household_net_income_baseline) / household_net_income_baseline
+ ) * 100
+
+ # Create DataFrame
+ df = pd.DataFrame({
+ 'household_net_income': household_net_income_baseline,
+ 'weight': household_weight,
+ 'tax_change': tax_change,
+ 'income_change_pct': income_change_pct,
+ 'income_rank_var': baseline.calculate(income_rank_var, year, map_to="household")
+ })
+
+ # Calculate percentiles
+
+ print(f"Ranking according to quantiles with: {income_rank_var}")
+ df['income_percentile'] = df['income_rank_var'].rank(pct=True) * 100
+
+ # Assign income groups
+ def assign_income_group(percentile):
+ if percentile <= 20:
+ return 'First quintile'
+ elif percentile <= 40:
+ return 'Second quintile'
+ elif percentile <= 60:
+ return 'Middle quintile'
+ elif percentile <= 80:
+ return 'Fourth quintile'
+ elif percentile <= 90:
+ return '80-90%'
+ elif percentile <= 95:
+ return '90-95%'
+ elif percentile <= 99:
+ return '95-99%'
+ elif percentile <= 99.9:
+ return '99-99.9%'
+ else:
+ return 'Top 0.1%'
+
+ df['income_group'] = df['income_percentile'].apply(assign_income_group)
+
+ # Calculate aggregate revenue
+ revenue_impact = (income_tax_reform.sum() - income_tax_baseline.sum()) / 1e9
+
+ # Calculate by group
+ results = []
+ for group in ['First quintile', 'Second quintile', 'Middle quintile', 'Fourth quintile',
+ '80-90%', '90-95%', '95-99%', '99-99.9%', 'Top 0.1%']:
+ group_data = df[df['income_group'] == group]
+ if len(group_data) == 0:
+ continue
+
+ total_weight = group_data['weight'].sum()
+ avg_tax_change = (group_data['tax_change'] * group_data['weight']).sum() / total_weight
+ avg_income_change_pct = (group_data['income_change_pct'] * group_data['weight']).sum() / total_weight
+
+ results.append({
+ 'group': group,
+ 'pe_tax_change': round(avg_tax_change),
+ 'pe_pct_change': round(avg_income_change_pct, 1),
+ })
+
+ return pd.DataFrame(results), revenue_impact
+
+def generate_comparison_table(pe_results, year):
+ """Generate comparison table with Wharton benchmark"""
+
+ if year not in WHARTON_BENCHMARKS:
+ print(f"Warning: No Wharton benchmark available for year {year}")
+ return pe_results
+
+ wharton_data = WHARTON_BENCHMARKS[year]
+
+ comparison = []
+ for _, row in pe_results.iterrows():
+ group = row['group']
+ wharton = wharton_data.get(group, {'tax_change': None, 'pct_change': None})
+
+ pe_tax = row['pe_tax_change']
+ wh_tax = wharton['tax_change']
+
+ comparison.append({
+ 'Income Group': group,
+ 'PolicyEngine': f"${pe_tax:,}",
+ 'Wharton': f"${wh_tax:,}" if wh_tax is not None else 'N/A',
+ 'Difference': f"${(pe_tax - wh_tax):,}" if wh_tax is not None else 'N/A',
+ 'PE %': f"{row['pe_pct_change']}%",
+ 'Wharton %': f"{wharton['pct_change']}%" if wharton['pct_change'] is not None else 'N/A',
+ })
+
+ return pd.DataFrame(comparison)
+
+# Example usage:
+# dataset_path = 'hf://policyengine/test/2054.h5'
+# year = 2054
+# income_rank_variable = "household_net_income"
+# pe_results, revenue_impact = run_analysis(dataset_path, year, income_rank_variable)
+# comparison_table = generate_comparison_table(pe_results, year)
+```
diff --git a/docs/myst.yml b/docs/myst.yml
index 6b6ceae6..c38666af 100644
--- a/docs/myst.yml
+++ b/docs/myst.yml
@@ -24,8 +24,8 @@ project:
- file: background.md
- file: data.md
- file: methodology.md
+ - file: long_term_projections.md
- file: local_area_calibration_setup.ipynb
- - file: long_term_projections.ipynb
- file: discussion.md
- file: conclusion.md
- file: appendix.md
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 06619d6e..9b09a38f 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -78,7 +78,7 @@ def generate(self):
undocumented_students_target=0.21 * 1.9e6,
)
logging.info("Adding family variables")
- add_spm_variables(cps, spm_unit)
+ add_spm_variables(self, cps, spm_unit)
logging.info("Adding household variables")
add_household_variables(cps, household)
logging.info("Adding rent")
@@ -602,7 +602,11 @@ def add_personal_income_variables(
cps[f"{var}_would_be_qualified"] = rng.random(len(person)) < prob
-def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None:
+def add_spm_variables(self, cps: h5py.File, spm_unit: DataFrame) -> None:
+ from policyengine_us_data.utils.spm import (
+ calculate_spm_thresholds_with_geoadj,
+ )
+
SPM_RENAMES = dict(
spm_unit_total_income_reported="SPM_TOTVAL",
snap_reported="SPM_SNAPSUB",
@@ -616,7 +620,6 @@ def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None:
# State tax includes refundable credits.
spm_unit_state_tax_reported="SPM_STTAX",
spm_unit_capped_work_childcare_expenses="SPM_CAPWKCCXPNS",
- spm_unit_spm_threshold="SPM_POVTHRESHOLD",
spm_unit_net_income_reported="SPM_RESOURCES",
spm_unit_pre_subsidy_childcare_expenses="SPM_CHILDCAREXPNS",
)
@@ -625,6 +628,16 @@ def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None:
if asec_variable in spm_unit.columns:
cps[openfisca_variable] = spm_unit[asec_variable]
+ # Calculate SPM thresholds using spm-calculator with Census-provided
+ # geographic adjustment factors (SPM_GEOADJ)
+ cps["spm_unit_spm_threshold"] = calculate_spm_thresholds_with_geoadj(
+ num_adults=spm_unit["SPM_NUMADULTS"].values,
+ num_children=spm_unit["SPM_NUMKIDS"].values,
+ tenure_codes=spm_unit["SPM_TENMORTSTATUS"].values,
+ geoadj=spm_unit["SPM_GEOADJ"].values,
+ year=self.time_period,
+ )
+
cps["reduced_price_school_meals_reported"] = (
cps["free_school_meals_reported"] * 0
)
diff --git a/policyengine_us_data/utils/spm.py b/policyengine_us_data/utils/spm.py
new file mode 100644
index 00000000..070db533
--- /dev/null
+++ b/policyengine_us_data/utils/spm.py
@@ -0,0 +1,53 @@
+"""SPM threshold calculation utilities using the spm-calculator package."""
+
+import numpy as np
+from spm_calculator import SPMCalculator, spm_equivalence_scale
+
+
+TENURE_CODE_MAP = {
+ 1: "owner_with_mortgage",
+ 2: "owner_without_mortgage",
+ 3: "renter",
+}
+
+
+def calculate_spm_thresholds_with_geoadj(
+ num_adults: np.ndarray,
+ num_children: np.ndarray,
+ tenure_codes: np.ndarray,
+ geoadj: np.ndarray,
+ year: int,
+) -> np.ndarray:
+ """
+ Calculate SPM thresholds using Census-provided geographic adjustments.
+
+ This function uses the SPM_GEOADJ values already computed by the Census
+ Bureau, combined with spm-calculator's base thresholds and equivalence
+ scale formula. This avoids the need for a Census API key.
+
+ Args:
+ num_adults: Array of number of adults (18+) in each SPM unit.
+ num_children: Array of number of children (<18) in each SPM unit.
+ tenure_codes: Array of Census tenure/mortgage status codes.
+ 1 = owner with mortgage, 2 = owner without mortgage, 3 = renter.
+ geoadj: Array of Census SPM_GEOADJ geographic adjustment factors.
+ year: The year for which to calculate thresholds.
+
+ Returns:
+ Array of SPM threshold values.
+ """
+ calc = SPMCalculator(year=year)
+ base_thresholds = calc.get_base_thresholds()
+
+ n = len(num_adults)
+ thresholds = np.zeros(n)
+
+ for i in range(n):
+ tenure_str = TENURE_CODE_MAP.get(int(tenure_codes[i]), "renter")
+ base = base_thresholds[tenure_str]
+ equiv_scale = spm_equivalence_scale(
+ int(num_adults[i]), int(num_children[i])
+ )
+ thresholds[i] = base * equiv_scale * geoadj[i]
+
+ return thresholds
diff --git a/pyproject.toml b/pyproject.toml
index 495feafe..14acb68e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,6 +42,7 @@ dependencies = [
"sqlalchemy>=2.0.41",
"sqlmodel>=0.0.24",
"xlrd>=2.0.2",
+ "spm-calculator>=0.1.0",
]
[project.optional-dependencies]