diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..c3d09844 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,8 @@ +- bump: minor + changes: + added: + - SPM threshold calculation using policyengine/spm-calculator package + - New utility module (policyengine_us_data/utils/spm.py) for SPM calculations + changed: + - CPS datasets now calculate SPM thresholds using spm-calculator with Census-provided geographic adjustments + - ACS datasets now calculate SPM thresholds using spm-calculator with national-level thresholds diff --git a/docs/long_term_projections.ipynb b/docs/long_term_projections.ipynb deleted file mode 100644 index 10b07b72..00000000 --- a/docs/long_term_projections.ipynb +++ /dev/null @@ -1,1015 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": "# Long Term Projections\n## Integrating Economic Uprating with Demographic Reweighting" - }, - { - "cell_type": "markdown", - "source": "## Executive Summary\n\nThis document outlines an innovative approach for projecting federal income tax revenue through 2100 that uniquely combines sophisticated economic microsimulation with demographic reweighting. By harmonizing PolicyEngine's state-of-the-art tax modeling with Social Security Administration demographic projections, we can isolate and quantify the fiscal impact of population aging while preserving the full complexity of the tax code.", - "metadata": {} - }, - { - "cell_type": "markdown", - "source": "## The Challenge\n\nProjecting tax revenue over a 75-year horizon requires simultaneously modeling two distinct but interrelated dynamics:\n\n**Economic Evolution**: How incomes, prices, and tax parameters change over time\n- Wage growth and income distribution shifts\n- Inflation affecting brackets and deductions\n- Legislative changes and indexing rules\n- Behavioral responses to tax policy\n\n**Demographic Transformation**: How the population structure evolves\n- Baby boom generation aging through retirement\n- Declining birth rates reducing working-age population\n- Increasing longevity extending retirement duration\n- Shifting household composition patterns\n\nTraditional approaches typically sacrifice either economic sophistication (using simplified tax calculations) or demographic realism (holding age distributions constant). Our methodology preserves both.", - "metadata": {} - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Run projections using `run_household_projection.py`:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "execution": { - "iopub.execute_input": "2025-11-19T19:56:10.232617Z", - "iopub.status.busy": "2025-11-19T19:56:10.232523Z", - "iopub.status.idle": "2025-11-19T19:57:25.892674Z", - "shell.execute_reply": "2025-11-19T19:57:25.892382Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "TEST_LITE == False\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "======================================================================\r\n", - "HOUSEHOLD-LEVEL INCOME TAX PROJECTION: 2025-2027\r\n", - "======================================================================\r\n", - "\r\n", - "Configuration:\r\n", - " Base year: 2024 (CPS microdata)\r\n", - " Projection: 2025-2027\r\n", - " Calculation level: HOUSEHOLD ONLY (simplified)\r\n", - " Calibration method: GREG\r\n", - " Including Social Security benefits constraint: Yes\r\n", - " Including taxable payroll constraint: Yes\r\n", - " Saving year-specific .h5 files: Yes (to ./projected_datasets/)\r\n", - " Years to process: 3\r\n", - " Estimated time: ~9 minutes\r\n", - "\r\n", - "======================================================================\r\n", - "STEP 1: DEMOGRAPHIC PROJECTIONS\r\n", - "======================================================================\r\n", - "\r\n", - "Loaded SSA projections: 86 ages x 3 years\r\n", - "\r\n", - "Population projections:\r\n", - " 2025: 346.6M\r\n", - " 2027: 350.9M\r\n", - "\r\n", - "======================================================================\r\n", - "STEP 2: BUILDING HOUSEHOLD AGE COMPOSITION\r\n", - "======================================================================\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r\n", - "Loaded 21,532 households\r\n", - "Household age matrix shape: (21532, 86)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\r\n", - "======================================================================\r\n", - "STEP 3: HOUSEHOLD-LEVEL PROJECTION\r\n", - "======================================================================\r\n", - "\r\n", - "Methodology (SIMPLIFIED):\r\n", - " 1. PolicyEngine uprates to each projection year\r\n", - " 2. Calculate all values at household level (map_to='household')\r\n", - " 3. IPF/GREG adjusts weights to match SSA demographics\r\n", - " 4. Apply calibrated weights directly (no aggregation needed)\r\n", - "\r\n", - "Initial memory usage: 1.13 GB\r\n", - "\r\n", - "Year Population Income Tax Baseline Tax Memory\r\n", - "-----------------------------------------------------------------\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " [DEBUG 2025] SS baseline: $1424.6B, target: $1609.0B\r\n", - " [DEBUG 2025] Payroll baseline: $8950.9B, target: $10621.0B\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " [DEBUG 2025] SS achieved: $1609.0B (error: -0.0%)\r\n", - " [DEBUG 2025] Payroll achieved: $10621.0B (error: -0.0%)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Saved 2025.h5\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2025 346.6M $ 2543.1B $ 1882.9B 4.32GB\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " [DEBUG 2027] SS baseline: $1495.1B, target: $1799.9B\r\n", - " [DEBUG 2027] Payroll baseline: $9718.4B, target: $11627.0B\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " [DEBUG 2027] SS achieved: $1799.9B (error: -0.0%)\r\n", - " [DEBUG 2027] Payroll achieved: $11627.0B (error: 0.0%)\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Saved 2027.h5\r\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2027 350.9M $ 2873.8B $ 2125.0B 4.62GB\r\n" - ] - } - ], - "source": [ - "# Save calibrated datasets as .h5 files for each year\n", - "!python ../policyengine_us_data/datasets/cps/long_term/run_household_projection.py 2027 --greg --use-ss --use-payroll --save-h5" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Arguments:**\n", - "- `END_YEAR`: Target year for projection (default: 2035)\n", - "- `--greg`: Use GREG calibration instead of IPF (optional)\n", - "- `--use-ss`: Include Social Security benefit totals as calibration target (requires --greg)\n", - "- `--use-payroll`: Include taxable payroll as calibration target (requires --greg)\n", - "- `--save-h5`: Save year-specific .h5 files to `./projected_datasets/` directory" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The Challenge\n", - "\n", - "Projecting tax revenue over a 75-year horizon requires simultaneously modeling two distinct but interrelated dynamics:\n", - "\n", - "**Economic Evolution**: How incomes, prices, and tax parameters change over time\n", - "- Wage growth and income distribution shifts\n", - "- Inflation affecting brackets and deductions\n", - "- Legislative changes and indexing rules\n", - "- Behavioral responses to tax policy\n", - "\n", - "**Demographic Transformation**: How the population structure evolves\n", - "- Baby boom generation aging through retirement\n", - "- Declining birth rates reducing working-age population\n", - "- Increasing longevity extending retirement duration\n", - "- Shifting household composition patterns\n", - "\n", - "Traditional approaches typically sacrifice either economic sophistication (using simplified tax calculations) or demographic realism (holding age distributions constant). Our methodology preserves both." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data Sources\n", - "\n", - "The long-term projections use two key SSA datasets:\n", - "\n", - "1. **SSA Population Projections** (`SSPopJul_TR2024.csv`)\n", - " - Source: [SSA 2024 Trustees Report - Single Year Age Demographic Projections](https://www.ssa.gov/oact/HistEst/Population/2024/Population2024.html)\n", - " - Contains age-specific population projections through 2100\n", - " - Used for demographic reweighting to match future population structure\n", - "\n", - "2. **Social Security Cost Projections** (`social_security_aux.csv`)\n", - " - Source: [SSA 2025 Trustees Report, Table VI.G9](https://www.ssa.gov/oact/TR/2025/index.html)\n", - " - Contains OASDI benefit cost projections in CPI-indexed 2025 dollars\n", - " - Used as calibration target in GREG method to ensure fiscal consistency" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "execution": { - "iopub.execute_input": "2025-11-19T19:57:25.894248Z", - "iopub.status.busy": "2025-11-19T19:57:25.894165Z", - "iopub.status.idle": "2025-11-19T19:57:28.259561Z", - "shell.execute_reply": "2025-11-19T19:57:28.259297Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/baogorek/envs/pe/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "TEST_LITE == False\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from pathlib import Path\n", - "\n", - "from policyengine_us_data.storage import STORAGE_FOLDER" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "execution": { - "iopub.execute_input": "2025-11-19T19:57:28.260599Z", - "iopub.status.busy": "2025-11-19T19:57:28.260502Z", - "iopub.status.idle": "2025-11-19T19:57:28.275248Z", - "shell.execute_reply": "2025-11-19T19:57:28.275040Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
YearAgeTotalM TotM SinM MarM WidM DivF TotF SinF MarF WidF Div
01941024925081276328127632800012161801216180000
11941123842901218524121852400011657661165766000
21941224450541246078124607800011989761198976000
31941323959991219543121954300011764561176456000
41941422754251157612115761200011178131117813000
\n", - "
" - ], - "text/plain": [ - " Year Age Total M Tot M Sin M Mar M Wid M Div F Tot \\\n", - "0 1941 0 2492508 1276328 1276328 0 0 0 1216180 \n", - "1 1941 1 2384290 1218524 1218524 0 0 0 1165766 \n", - "2 1941 2 2445054 1246078 1246078 0 0 0 1198976 \n", - "3 1941 3 2395999 1219543 1219543 0 0 0 1176456 \n", - "4 1941 4 2275425 1157612 1157612 0 0 0 1117813 \n", - "\n", - " F Sin F Mar F Wid F Div \n", - "0 1216180 0 0 0 \n", - "1 1165766 0 0 0 \n", - "2 1198976 0 0 0 \n", - "3 1176456 0 0 0 \n", - "4 1117813 0 0 0 " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load SSA population data\n", - "ssa_pop = pd.read_csv(STORAGE_FOLDER / 'SSPopJul_TR2024.csv')\n", - "ssa_pop.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "execution": { - "iopub.execute_input": "2025-11-19T19:57:28.276266Z", - "iopub.status.busy": "2025-11-19T19:57:28.276171Z", - "iopub.status.idle": "2025-11-19T19:57:28.280120Z", - "shell.execute_reply": "2025-11-19T19:57:28.279840Z" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
yearoasdi_cost_in_billion_2025_usdcpi_w_intermediateoasdi_cost_in_billion_nominal_usdtaxable_payroll_in_billion_nominal_usd
020251609100.001609.000010621.0
120261660102.491701.334011129.0
220271715104.951799.892511627.0
320281763107.471894.696112159.0
420291810110.051991.905012696.0
\n", - "
" - ], - "text/plain": [ - " year oasdi_cost_in_billion_2025_usd cpi_w_intermediate \\\n", - "0 2025 1609 100.00 \n", - "1 2026 1660 102.49 \n", - "2 2027 1715 104.95 \n", - "3 2028 1763 107.47 \n", - "4 2029 1810 110.05 \n", - "\n", - " oasdi_cost_in_billion_nominal_usd taxable_payroll_in_billion_nominal_usd \n", - "0 1609.0000 10621.0 \n", - "1 1701.3340 11129.0 \n", - "2 1799.8925 11627.0 \n", - "3 1894.6961 12159.0 \n", - "4 1991.9050 12696.0 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Load Social Security auxiliary data\n", - "ss_aux = pd.read_csv(STORAGE_FOLDER / 'social_security_aux.csv')\n", - "ss_aux.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Core Innovation\n", - "\n", - "Our approach operates in two complementary stages:\n", - "\n", - "### Stage 1: Economic Uprating\n", - "\n", - "PolicyEngine's microsimulation engine projects each household's economic circumstances forward using:\n", - "\n", - "**Sophisticated Income Modeling**\n", - "\n", - "The system models 17 distinct income categories, each uprated according to its economic fundamentals:\n", - "\n", - "*Primary Categories with Specific Projections:*\n", - "- Employment income (wages) - follows CBO wage growth projections\n", - "- Self-employment income - follows CBO business income projections\n", - "- Capital gains - follows CBO asset appreciation projections\n", - "- Interest income - follows CBO interest rate projections\n", - "- Dividend income - follows CBO corporate profit projections\n", - "- Pension income - follows CBO retirement income projections\n", - "- Social Security - follows SSA COLA projections (available through 2100)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Stage 2: Demographic Reweighting\n", - "\n", - "We offer two calibration methods for adjusting household weights to match SSA projections:\n", - "\n", - "**Method 1: Iterative Proportional Fitting (IPF)**\n", - "- Traditional raking approach using Kullback-Leibler divergence\n", - "- Iteratively adjusts weights to match marginal distributions\n", - "- Robust to specification and always produces non-negative weights\n", - "- Default method for backward compatibility\n", - "\n", - "**Method 2: Generalized Regression (GREG) Calibration**\n", - "- Modern calibration using chi-squared distance minimization\n", - "- Enables simultaneous calibration to categorical AND continuous variables\n", - "- Direct solution via matrix operations (no iteration needed)\n", - "- Required for incorporating Social Security benefit constraints" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Demonstrating the Calibration Methods" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "execution": { - "iopub.execute_input": "2025-11-19T19:57:28.280998Z", - "iopub.status.busy": "2025-11-19T19:57:28.280918Z", - "iopub.status.idle": "2025-11-19T19:57:28.282636Z", - "shell.execute_reply": "2025-11-19T19:57:28.282472Z" - } - }, - "outputs": [], - "source": [ - "from policyengine_us_data.datasets.cps.long_term.ssa_data import (\n", - " load_ssa_age_projections,\n", - " load_ssa_benefit_projections\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "execution": { - "iopub.execute_input": "2025-11-19T19:57:28.283285Z", - "iopub.status.busy": "2025-11-19T19:57:28.283217Z", - "iopub.status.idle": "2025-11-19T19:57:28.296829Z", - "shell.execute_reply": "2025-11-19T19:57:28.296600Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Age distribution targets for 2025:\n", - "Shape: (86, 1)\n", - "Total population: 346577.3M\n" - ] - } - ], - "source": [ - "# Get SSA population targets for a specific year\n", - "year = 2025\n", - "age_targets = load_ssa_age_projections(end_year=year)\n", - "print(f\"\\nAge distribution targets for {year}:\")\n", - "print(f\"Shape: {age_targets.shape}\")\n", - "print(f\"Total population: {age_targets[:, 0].sum() / 1000:.1f}M\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "execution": { - "iopub.execute_input": "2025-11-19T19:57:28.297698Z", - "iopub.status.busy": "2025-11-19T19:57:28.297611Z", - "iopub.status.idle": "2025-11-19T19:57:28.300109Z", - "shell.execute_reply": "2025-11-19T19:57:28.299938Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Social Security benefit target for 2025: $1609.0B\n" - ] - } - ], - "source": [ - "# Get Social Security benefit target\n", - "ss_target = load_ssa_benefit_projections(year)\n", - "print(f\"\\nSocial Security benefit target for {year}: ${ss_target / 1e9:.1f}B\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### PWBM Analysis: Eliminating Income Taxes on Social Security Benefits\n", - "\n", - "**Source:** [Eliminating Income Taxes on Social Security Benefits](https://budgetmodel.wharton.upenn.edu/issues/2025/2/10/eliminating-income-taxes-on-social-security-benefits) (Penn Wharton Budget Model, February 10, 2025)\n", - "\n", - "---\n", - "\n", - "#### Policy Analyzed\n", - "The Penn Wharton Budget Model (PWBM) analyzed a policy proposal to permanently eliminate all income taxes on Social Security benefits, effective January 1, 2025.\n", - "\n", - "#### Key Findings\n", - "\n", - "* **Budgetary Impact:** The policy is projected to reduce federal revenues by **$1.45 trillion** over the 10-year budget window (2025-2034). Over the long term, it is projected to increase federal debt by 7 percent by 2054, relative to the current baseline.\n", - "\n", - "* **Macroeconomic Impact:** The analysis finds the policy would have negative long-term effects on the economy.\n", - " * It reduces incentives for households to save for retirement and to work.\n", - " * This leads to a smaller capital stock (projected to be 4.2% lower by 2054).\n", - " * The smaller capital stock results in lower average wages (1.8% lower by 2054) and lower GDP (2.1% lower by 2054).\n", - "\n", - "* **Conventional Distributional Impact (Your Table):** The table you shared shows the annual \"conventional\" effects on household after-tax income.\n", - " * The largest average *dollar* tax cuts go to households in the top 20 percent of the income distribution (quintiles 80-100%).\n", - " * The largest *relative* gains (as a percentage of income) go to households in the fourth quintile (60-80%), who see a 1.6% increase in after-tax income by 2054.\n", - " * The dollar amounts shown are in **nominal dollars** for each specified year, not adjusted to a single base year.\n", - "\n", - "* **Dynamic (Lifetime) Impact:** When analyzing the policy's effects over a household's entire lifetime, PWBM finds:\n", - " * The policy primarily benefits high-income households who are nearing or in retirement.\n", - " * It negatively impacts all households under the age of 30 and all future generations, who would experience a net welfare loss due to the long-term effects of lower wages and higher federal debt." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### PolicyEngine's Analysis of Eliminating Income Taxes on Social Security Benefits" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "execution": { - "iopub.execute_input": "2025-11-19T19:57:28.300920Z", - "iopub.status.busy": "2025-11-19T19:57:28.300844Z", - "iopub.status.idle": "2025-11-19T19:59:04.747344Z", - "shell.execute_reply": "2025-11-19T19:59:04.747122Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "================================================================================\n", - "WHARTON COMPARISON PIPELINE - YEAR 2054\n", - "================================================================================\n", - "\n", - "Running PolicyEngine analysis...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Loading dataset: hf://policyengine/test/2054.h5\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Ranking according to quantiles with: household_net_income\n", - "✓ Analysis complete\n", - " Revenue impact: $-579.1B\n", - "\n", - "Generating comparison table...\n", - "\n", - "================================================================================\n", - "COMPARISON TABLE: 2054\n", - "================================================================================\n", - "\n", - "Average Tax Change (per household):\n", - " Income Group PolicyEngine Wharton Difference\n", - " First quintile $-95 $-5 $-90\n", - "Second quintile $-1,054 $-275 $-779\n", - "Middle quintile $-2,241 $-1,730 $-511\n", - "Fourth quintile $-4,633 $-3,560 $-1,073\n", - " 80-90% $-6,737 $-4,075 $-2,662\n", - " 90-95% $-12,121 $-4,385 $-7,736\n", - " 95-99% $-8,066 $-4,565 $-3,501\n", - " 99-99.9% $-7,257 $-4,820 $-2,437\n", - " Top 0.1% $-8,615 $-5,080 $-3,535\n", - "\n", - "Percent Change in Income:\n", - " Income Group PE % Wharton %\n", - " First quintile -0.10000000149011612% 0.0%\n", - "Second quintile 0.800000011920929% 0.3%\n", - "Middle quintile 1.100000023841858% 1.3%\n", - "Fourth quintile 1.399999976158142% 1.6%\n", - " 80-90% 1.399999976158142% 1.2%\n", - " 90-95% 1.7000000476837158% 0.9%\n", - " 95-99% 0.699999988079071% 0.6%\n", - " 99-99.9% 0.30000001192092896% 0.2%\n", - " Top 0.1% 0.10000000149011612% 0.0%\n", - "\n" - ] - } - ], - "source": [ - "import sys\n", - "import os\n", - "import pandas as pd\n", - "import numpy as np\n", - "import gc\n", - "\n", - "from policyengine_us import Microsimulation\n", - "from policyengine_core.reforms import Reform\n", - "\n", - "WHARTON_BENCHMARKS = {\n", - " 2026: {\n", - " 'First quintile': {'tax_change': 0, 'pct_change': 0.0},\n", - " 'Second quintile': {'tax_change': -15, 'pct_change': 0.0},\n", - " 'Middle quintile': {'tax_change': -340, 'pct_change': 0.5},\n", - " 'Fourth quintile': {'tax_change': -1135, 'pct_change': 1.1},\n", - " '80-90%': {'tax_change': -1625, 'pct_change': 1.0},\n", - " '90-95%': {'tax_change': -1590, 'pct_change': 0.7},\n", - " '95-99%': {'tax_change': -2020, 'pct_change': 0.5},\n", - " '99-99.9%': {'tax_change': -2205, 'pct_change': 0.2},\n", - " 'Top 0.1%': {'tax_change': -2450, 'pct_change': 0.0},\n", - " },\n", - " 2034: {\n", - " 'First quintile': {'tax_change': 0, 'pct_change': 0.0},\n", - " 'Second quintile': {'tax_change': -45, 'pct_change': 0.1},\n", - " 'Middle quintile': {'tax_change': -615, 'pct_change': 0.8},\n", - " 'Fourth quintile': {'tax_change': -1630, 'pct_change': 1.2},\n", - " '80-90%': {'tax_change': -2160, 'pct_change': 1.1},\n", - " '90-95%': {'tax_change': -2160, 'pct_change': 0.7},\n", - " '95-99%': {'tax_change': -2605, 'pct_change': 0.6},\n", - " '99-99.9%': {'tax_change': -2715, 'pct_change': 0.2},\n", - " 'Top 0.1%': {'tax_change': -2970, 'pct_change': 0.0},\n", - " },\n", - " 2054: {\n", - " 'First quintile': {'tax_change': -5, 'pct_change': 0.0},\n", - " 'Second quintile': {'tax_change': -275, 'pct_change': 0.3},\n", - " 'Middle quintile': {'tax_change': -1730, 'pct_change': 1.3},\n", - " 'Fourth quintile': {'tax_change': -3560, 'pct_change': 1.6},\n", - " '80-90%': {'tax_change': -4075, 'pct_change': 1.2},\n", - " '90-95%': {'tax_change': -4385, 'pct_change': 0.9},\n", - " '95-99%': {'tax_change': -4565, 'pct_change': 0.6},\n", - " '99-99.9%': {'tax_change': -4820, 'pct_change': 0.2},\n", - " 'Top 0.1%': {'tax_change': -5080, 'pct_change': 0.0},\n", - " },\n", - "}\n", - "\n", - "def run_analysis(dataset_path, year, income_rank_var = \"household_net_income\"):\n", - " \"\"\"Run Option 1 analysis for given dataset and year\"\"\"\n", - "\n", - " option1_reform = Reform.from_dict(\n", - " {\n", - " # Base rate parameters (0-50% bracket)\n", - " \"gov.irs.social_security.taxability.rate.base.benefit_cap\": {\n", - " \"2026-01-01.2100-12-31\": 0\n", - " },\n", - " \"gov.irs.social_security.taxability.rate.base.excess\": {\n", - " \"2026-01-01.2100-12-31\": 0\n", - " },\n", - " # Additional rate parameters (50-85% bracket)\n", - " \"gov.irs.social_security.taxability.rate.additional.benefit_cap\": {\n", - " \"2026-01-01.2100-12-31\": 0\n", - " },\n", - " \"gov.irs.social_security.taxability.rate.additional.bracket\": {\n", - " \"2026-01-01.2100-12-31\": 0\n", - " },\n", - " \"gov.irs.social_security.taxability.rate.additional.excess\": {\n", - " \"2026-01-01.2100-12-31\": 0\n", - " }\n", - " }, country_id=\"us\"\n", - " )\n", - " reform = Microsimulation(dataset=dataset_path, reform=option1_reform)\n", - "\n", - " # Get household data\n", - " household_net_income_reform = reform.calculate(\"household_net_income\", period=year, map_to=\"household\")\n", - " household_agi_reform = reform.calculate(\"adjusted_gross_income\", period=year, map_to=\"household\")\n", - " income_tax_reform = reform.calculate(\"income_tax\", period=year, map_to=\"household\")\n", - "\n", - " del reform\n", - " gc.collect()\n", - "\n", - " print(f\"Loading dataset: {dataset_path}\")\n", - " baseline = Microsimulation(dataset=dataset_path)\n", - " household_weight = baseline.calculate(\"household_weight\", period=year)\n", - " household_net_income_baseline = baseline.calculate(\"household_net_income\", period=year, map_to=\"household\")\n", - " household_agi_baseline = baseline.calculate(\"adjusted_gross_income\", period=year, map_to=\"household\")\n", - " income_tax_baseline = baseline.calculate(\"income_tax\", period=year, map_to=\"household\")\n", - "\n", - " # Calculate changes\n", - " tax_change = income_tax_reform - income_tax_baseline\n", - " income_change_pct = (\n", - " (household_net_income_reform - household_net_income_baseline) / household_net_income_baseline\n", - " ) * 100\n", - "\n", - " # Create DataFrame\n", - " df = pd.DataFrame({\n", - " 'household_net_income': household_net_income_baseline,\n", - " 'weight': household_weight,\n", - " 'tax_change': tax_change,\n", - " 'income_change_pct': income_change_pct,\n", - " 'income_rank_var': baseline.calculate(income_rank_var, year, map_to=\"household\")\n", - " })\n", - " \n", - " # Calculate percentiles\n", - "\n", - " print(f\"Ranking according to quantiles with: {income_rank_var}\")\n", - " df['income_percentile'] = df['income_rank_var'].rank(pct=True) * 100\n", - "\n", - " # Assign income groups\n", - " def assign_income_group(percentile):\n", - " if percentile <= 20:\n", - " return 'First quintile'\n", - " elif percentile <= 40:\n", - " return 'Second quintile'\n", - " elif percentile <= 60:\n", - " return 'Middle quintile'\n", - " elif percentile <= 80:\n", - " return 'Fourth quintile'\n", - " elif percentile <= 90:\n", - " return '80-90%'\n", - " elif percentile <= 95:\n", - " return '90-95%'\n", - " elif percentile <= 99:\n", - " return '95-99%'\n", - " elif percentile <= 99.9:\n", - " return '99-99.9%'\n", - " else:\n", - " return 'Top 0.1%'\n", - "\n", - " df['income_group'] = df['income_percentile'].apply(assign_income_group)\n", - "\n", - " # Calculate aggregate revenue\n", - " revenue_impact = (income_tax_reform.sum() - income_tax_baseline.sum()) / 1e9\n", - "\n", - " # Calculate by group\n", - " results = []\n", - " for group in ['First quintile', 'Second quintile', 'Middle quintile', 'Fourth quintile',\n", - " '80-90%', '90-95%', '95-99%', '99-99.9%', 'Top 0.1%']:\n", - " group_data = df[df['income_group'] == group]\n", - " if len(group_data) == 0:\n", - " continue\n", - "\n", - " total_weight = group_data['weight'].sum()\n", - " avg_tax_change = (group_data['tax_change'] * group_data['weight']).sum() / total_weight\n", - " avg_income_change_pct = (group_data['income_change_pct'] * group_data['weight']).sum() / total_weight\n", - "\n", - " results.append({\n", - " 'group': group,\n", - " 'pe_tax_change': round(avg_tax_change),\n", - " 'pe_pct_change': round(avg_income_change_pct, 1),\n", - " })\n", - "\n", - " return pd.DataFrame(results), revenue_impact\n", - "\n", - "def generate_comparison_table(pe_results, year):\n", - " \"\"\"Generate comparison table with Wharton benchmark\"\"\"\n", - "\n", - " if year not in WHARTON_BENCHMARKS:\n", - " print(f\"Warning: No Wharton benchmark available for year {year}\")\n", - " return pe_results\n", - "\n", - " wharton_data = WHARTON_BENCHMARKS[year]\n", - "\n", - " comparison = []\n", - " for _, row in pe_results.iterrows():\n", - " group = row['group']\n", - " wharton = wharton_data.get(group, {'tax_change': None, 'pct_change': None})\n", - "\n", - " pe_tax = row['pe_tax_change']\n", - " wh_tax = wharton['tax_change']\n", - "\n", - " comparison.append({\n", - " 'Income Group': group,\n", - " 'PolicyEngine': f\"${pe_tax:,}\",\n", - " 'Wharton': f\"${wh_tax:,}\" if wh_tax is not None else 'N/A',\n", - " 'Difference': f\"${(pe_tax - wh_tax):,}\" if wh_tax is not None else 'N/A',\n", - " 'PE %': f\"{row['pe_pct_change']}%\",\n", - " 'Wharton %': f\"{wharton['pct_change']}%\" if wharton['pct_change'] is not None else 'N/A',\n", - " })\n", - "\n", - " return pd.DataFrame(comparison)\n", - "\n", - "dataset_path = 'hf://policyengine/test/2054.h5'\n", - "year = 2054\n", - "income_rank_variable = \"household_net_income\"\n", - "\n", - "print(\"=\"*80)\n", - "print(f\"WHARTON COMPARISON PIPELINE - YEAR {year}\")\n", - "print(\"=\"*80)\n", - "print()\n", - "\n", - "# Run analysis\n", - "print(\"Running PolicyEngine analysis...\")\n", - "pe_results, revenue_impact = run_analysis(dataset_path, year, income_rank_variable)\n", - "print(f\"✓ Analysis complete\")\n", - "print(f\" Revenue impact: ${revenue_impact:.1f}B\")\n", - "print()\n", - "\n", - "# Generate comparison table\n", - "print(\"Generating comparison table...\")\n", - "comparison_table = generate_comparison_table(pe_results, year)\n", - "\n", - "print()\n", - "print(\"=\"*80)\n", - "print(f\"COMPARISON TABLE: {year}\")\n", - "print(\"=\"*80)\n", - "print()\n", - "print(\"Average Tax Change (per household):\")\n", - "print(comparison_table[['Income Group', 'PolicyEngine', 'Wharton', 'Difference']].to_string(index=False))\n", - "print()\n", - "print(\"Percent Change in Income:\")\n", - "print(comparison_table[['Income Group', 'PE %', 'Wharton %']].to_string(index=False))\n", - "print()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} \ No newline at end of file diff --git a/docs/long_term_projections.md b/docs/long_term_projections.md new file mode 100644 index 00000000..cc5d14c0 --- /dev/null +++ b/docs/long_term_projections.md @@ -0,0 +1,348 @@ +# Long Term Projections +## Integrating Economic Uprating with Demographic Reweighting + +## Executive Summary + +This document outlines an innovative approach for projecting federal income tax revenue through 2100 that uniquely combines sophisticated economic microsimulation with demographic reweighting. By harmonizing PolicyEngine's state-of-the-art tax modeling with Social Security Administration demographic projections, we can isolate and quantify the fiscal impact of population aging while preserving the full complexity of the tax code. + +## The Challenge + +Projecting tax revenue over a 75-year horizon requires simultaneously modeling two distinct but interrelated dynamics: + +**Economic Evolution**: How incomes, prices, and tax parameters change over time +- Wage growth and income distribution shifts +- Inflation affecting brackets and deductions +- Legislative changes and indexing rules +- Behavioral responses to tax policy + +**Demographic Transformation**: How the population structure evolves +- Baby boom generation aging through retirement +- Declining birth rates reducing working-age population +- Increasing longevity extending retirement duration +- Shifting household composition patterns + +Traditional approaches typically sacrifice either economic sophistication (using simplified tax calculations) or demographic realism (holding age distributions constant). Our methodology preserves both. + +## Running Projections + +Run projections using `run_household_projection.py`: + +```bash +# Save calibrated datasets as .h5 files for each year +python ../policyengine_us_data/datasets/cps/long_term/run_household_projection.py 2027 --greg --use-ss --use-payroll --save-h5 +``` + +**Arguments:** +- `END_YEAR`: Target year for projection (default: 2035) +- `--greg`: Use GREG calibration instead of IPF (optional) +- `--use-ss`: Include Social Security benefit totals as calibration target (requires --greg) +- `--use-payroll`: Include taxable payroll as calibration target (requires --greg) +- `--save-h5`: Save year-specific .h5 files to `./projected_datasets/` directory + +## Data Sources + +The long-term projections use two key SSA datasets: + +1. **SSA Population Projections** (`SSPopJul_TR2024.csv`) + - Source: [SSA 2024 Trustees Report - Single Year Age Demographic Projections](https://www.ssa.gov/oact/HistEst/Population/2024/Population2024.html) + - Contains age-specific population projections through 2100 + - Used for demographic reweighting to match future population structure + +2. **Social Security Cost Projections** (`social_security_aux.csv`) + - Source: [SSA 2025 Trustees Report, Table VI.G9](https://www.ssa.gov/oact/TR/2025/index.html) + - Contains OASDI benefit cost projections in CPI-indexed 2025 dollars + - Used as calibration target in GREG method to ensure fiscal consistency + +### Loading SSA Data + +```python +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from pathlib import Path + +from policyengine_us_data.storage import STORAGE_FOLDER + +# Load SSA population data +ssa_pop = pd.read_csv(STORAGE_FOLDER / 'SSPopJul_TR2024.csv') +ssa_pop.head() + +# Load Social Security auxiliary data +ss_aux = pd.read_csv(STORAGE_FOLDER / 'social_security_aux.csv') +ss_aux.head() +``` + +## Core Innovation + +Our approach operates in two complementary stages: + +### Stage 1: Economic Uprating + +PolicyEngine's microsimulation engine projects each household's economic circumstances forward using: + +**Sophisticated Income Modeling** + +The system models 17 distinct income categories, each uprated according to its economic fundamentals: + +*Primary Categories with Specific Projections:* +- Employment income (wages) - follows CBO wage growth projections +- Self-employment income - follows CBO business income projections +- Capital gains - follows CBO asset appreciation projections +- Interest income - follows CBO interest rate projections +- Dividend income - follows CBO corporate profit projections +- Pension income - follows CBO retirement income projections +- Social Security - follows SSA COLA projections (available through 2100) + +### Stage 2: Demographic Reweighting + +We offer two calibration methods for adjusting household weights to match SSA projections: + +**Method 1: Iterative Proportional Fitting (IPF)** +- Traditional raking approach using Kullback-Leibler divergence +- Iteratively adjusts weights to match marginal distributions +- Robust to specification and always produces non-negative weights +- Default method for backward compatibility + +**Method 2: Generalized Regression (GREG) Calibration** +- Modern calibration using chi-squared distance minimization +- Enables simultaneous calibration to categorical AND continuous variables +- Direct solution via matrix operations (no iteration needed) +- Required for incorporating Social Security benefit constraints + +## Demonstrating the Calibration Methods + +```python +from policyengine_us_data.datasets.cps.long_term.ssa_data import ( + load_ssa_age_projections, + load_ssa_benefit_projections +) + +# Get SSA population targets for a specific year +year = 2025 +age_targets = load_ssa_age_projections(end_year=year) +print(f"\nAge distribution targets for {year}:") +print(f"Shape: {age_targets.shape}") +print(f"Total population: {age_targets[:, 0].sum() / 1000:.1f}M") + +# Get Social Security benefit target +ss_target = load_ssa_benefit_projections(year) +print(f"\nSocial Security benefit target for {year}: ${ss_target / 1e9:.1f}B") +``` + +## PWBM Analysis: Eliminating Income Taxes on Social Security Benefits + +**Source:** [Eliminating Income Taxes on Social Security Benefits](https://budgetmodel.wharton.upenn.edu/issues/2025/2/10/eliminating-income-taxes-on-social-security-benefits) (Penn Wharton Budget Model, February 10, 2025) + +--- + +### Policy Analyzed +The Penn Wharton Budget Model (PWBM) analyzed a policy proposal to permanently eliminate all income taxes on Social Security benefits, effective January 1, 2025. + +### Key Findings + +* **Budgetary Impact:** The policy is projected to reduce federal revenues by **$1.45 trillion** over the 10-year budget window (2025-2034). Over the long term, it is projected to increase federal debt by 7 percent by 2054, relative to the current baseline. + +* **Macroeconomic Impact:** The analysis finds the policy would have negative long-term effects on the economy. + * It reduces incentives for households to save for retirement and to work. + * This leads to a smaller capital stock (projected to be 4.2% lower by 2054). + * The smaller capital stock results in lower average wages (1.8% lower by 2054) and lower GDP (2.1% lower by 2054). + +* **Conventional Distributional Impact (Your Table):** The table you shared shows the annual "conventional" effects on household after-tax income. + * The largest average *dollar* tax cuts go to households in the top 20 percent of the income distribution (quintiles 80-100%). + * The largest *relative* gains (as a percentage of income) go to households in the fourth quintile (60-80%), who see a 1.6% increase in after-tax income by 2054. + * The dollar amounts shown are in **nominal dollars** for each specified year, not adjusted to a single base year. + +* **Dynamic (Lifetime) Impact:** When analyzing the policy's effects over a household's entire lifetime, PWBM finds: + * The policy primarily benefits high-income households who are nearing or in retirement. + * It negatively impacts all households under the age of 30 and all future generations, who would experience a net welfare loss due to the long-term effects of lower wages and higher federal debt. + +## PolicyEngine's Analysis of Eliminating Income Taxes on Social Security Benefits + +```python +import sys +import os +import pandas as pd +import numpy as np +import gc + +from policyengine_us import Microsimulation +from policyengine_core.reforms import Reform + +WHARTON_BENCHMARKS = { + 2026: { + 'First quintile': {'tax_change': 0, 'pct_change': 0.0}, + 'Second quintile': {'tax_change': -15, 'pct_change': 0.0}, + 'Middle quintile': {'tax_change': -340, 'pct_change': 0.5}, + 'Fourth quintile': {'tax_change': -1135, 'pct_change': 1.1}, + '80-90%': {'tax_change': -1625, 'pct_change': 1.0}, + '90-95%': {'tax_change': -1590, 'pct_change': 0.7}, + '95-99%': {'tax_change': -2020, 'pct_change': 0.5}, + '99-99.9%': {'tax_change': -2205, 'pct_change': 0.2}, + 'Top 0.1%': {'tax_change': -2450, 'pct_change': 0.0}, + }, + 2034: { + 'First quintile': {'tax_change': 0, 'pct_change': 0.0}, + 'Second quintile': {'tax_change': -45, 'pct_change': 0.1}, + 'Middle quintile': {'tax_change': -615, 'pct_change': 0.8}, + 'Fourth quintile': {'tax_change': -1630, 'pct_change': 1.2}, + '80-90%': {'tax_change': -2160, 'pct_change': 1.1}, + '90-95%': {'tax_change': -2160, 'pct_change': 0.7}, + '95-99%': {'tax_change': -2605, 'pct_change': 0.6}, + '99-99.9%': {'tax_change': -2715, 'pct_change': 0.2}, + 'Top 0.1%': {'tax_change': -2970, 'pct_change': 0.0}, + }, + 2054: { + 'First quintile': {'tax_change': -5, 'pct_change': 0.0}, + 'Second quintile': {'tax_change': -275, 'pct_change': 0.3}, + 'Middle quintile': {'tax_change': -1730, 'pct_change': 1.3}, + 'Fourth quintile': {'tax_change': -3560, 'pct_change': 1.6}, + '80-90%': {'tax_change': -4075, 'pct_change': 1.2}, + '90-95%': {'tax_change': -4385, 'pct_change': 0.9}, + '95-99%': {'tax_change': -4565, 'pct_change': 0.6}, + '99-99.9%': {'tax_change': -4820, 'pct_change': 0.2}, + 'Top 0.1%': {'tax_change': -5080, 'pct_change': 0.0}, + }, +} + +def run_analysis(dataset_path, year, income_rank_var = "household_net_income"): + """Run Option 1 analysis for given dataset and year""" + + option1_reform = Reform.from_dict( + { + # Base rate parameters (0-50% bracket) + "gov.irs.social_security.taxability.rate.base.benefit_cap": { + "2026-01-01.2100-12-31": 0 + }, + "gov.irs.social_security.taxability.rate.base.excess": { + "2026-01-01.2100-12-31": 0 + }, + # Additional rate parameters (50-85% bracket) + "gov.irs.social_security.taxability.rate.additional.benefit_cap": { + "2026-01-01.2100-12-31": 0 + }, + "gov.irs.social_security.taxability.rate.additional.bracket": { + "2026-01-01.2100-12-31": 0 + }, + "gov.irs.social_security.taxability.rate.additional.excess": { + "2026-01-01.2100-12-31": 0 + } + }, country_id="us" + ) + reform = Microsimulation(dataset=dataset_path, reform=option1_reform) + + # Get household data + household_net_income_reform = reform.calculate("household_net_income", period=year, map_to="household") + household_agi_reform = reform.calculate("adjusted_gross_income", period=year, map_to="household") + income_tax_reform = reform.calculate("income_tax", period=year, map_to="household") + + del reform + gc.collect() + + print(f"Loading dataset: {dataset_path}") + baseline = Microsimulation(dataset=dataset_path) + household_weight = baseline.calculate("household_weight", period=year) + household_net_income_baseline = baseline.calculate("household_net_income", period=year, map_to="household") + household_agi_baseline = baseline.calculate("adjusted_gross_income", period=year, map_to="household") + income_tax_baseline = baseline.calculate("income_tax", period=year, map_to="household") + + # Calculate changes + tax_change = income_tax_reform - income_tax_baseline + income_change_pct = ( + (household_net_income_reform - household_net_income_baseline) / household_net_income_baseline + ) * 100 + + # Create DataFrame + df = pd.DataFrame({ + 'household_net_income': household_net_income_baseline, + 'weight': household_weight, + 'tax_change': tax_change, + 'income_change_pct': income_change_pct, + 'income_rank_var': baseline.calculate(income_rank_var, year, map_to="household") + }) + + # Calculate percentiles + + print(f"Ranking according to quantiles with: {income_rank_var}") + df['income_percentile'] = df['income_rank_var'].rank(pct=True) * 100 + + # Assign income groups + def assign_income_group(percentile): + if percentile <= 20: + return 'First quintile' + elif percentile <= 40: + return 'Second quintile' + elif percentile <= 60: + return 'Middle quintile' + elif percentile <= 80: + return 'Fourth quintile' + elif percentile <= 90: + return '80-90%' + elif percentile <= 95: + return '90-95%' + elif percentile <= 99: + return '95-99%' + elif percentile <= 99.9: + return '99-99.9%' + else: + return 'Top 0.1%' + + df['income_group'] = df['income_percentile'].apply(assign_income_group) + + # Calculate aggregate revenue + revenue_impact = (income_tax_reform.sum() - income_tax_baseline.sum()) / 1e9 + + # Calculate by group + results = [] + for group in ['First quintile', 'Second quintile', 'Middle quintile', 'Fourth quintile', + '80-90%', '90-95%', '95-99%', '99-99.9%', 'Top 0.1%']: + group_data = df[df['income_group'] == group] + if len(group_data) == 0: + continue + + total_weight = group_data['weight'].sum() + avg_tax_change = (group_data['tax_change'] * group_data['weight']).sum() / total_weight + avg_income_change_pct = (group_data['income_change_pct'] * group_data['weight']).sum() / total_weight + + results.append({ + 'group': group, + 'pe_tax_change': round(avg_tax_change), + 'pe_pct_change': round(avg_income_change_pct, 1), + }) + + return pd.DataFrame(results), revenue_impact + +def generate_comparison_table(pe_results, year): + """Generate comparison table with Wharton benchmark""" + + if year not in WHARTON_BENCHMARKS: + print(f"Warning: No Wharton benchmark available for year {year}") + return pe_results + + wharton_data = WHARTON_BENCHMARKS[year] + + comparison = [] + for _, row in pe_results.iterrows(): + group = row['group'] + wharton = wharton_data.get(group, {'tax_change': None, 'pct_change': None}) + + pe_tax = row['pe_tax_change'] + wh_tax = wharton['tax_change'] + + comparison.append({ + 'Income Group': group, + 'PolicyEngine': f"${pe_tax:,}", + 'Wharton': f"${wh_tax:,}" if wh_tax is not None else 'N/A', + 'Difference': f"${(pe_tax - wh_tax):,}" if wh_tax is not None else 'N/A', + 'PE %': f"{row['pe_pct_change']}%", + 'Wharton %': f"{wharton['pct_change']}%" if wharton['pct_change'] is not None else 'N/A', + }) + + return pd.DataFrame(comparison) + +# Example usage: +# dataset_path = 'hf://policyengine/test/2054.h5' +# year = 2054 +# income_rank_variable = "household_net_income" +# pe_results, revenue_impact = run_analysis(dataset_path, year, income_rank_variable) +# comparison_table = generate_comparison_table(pe_results, year) +``` diff --git a/docs/myst.yml b/docs/myst.yml index 6b6ceae6..c38666af 100644 --- a/docs/myst.yml +++ b/docs/myst.yml @@ -24,8 +24,8 @@ project: - file: background.md - file: data.md - file: methodology.md + - file: long_term_projections.md - file: local_area_calibration_setup.ipynb - - file: long_term_projections.ipynb - file: discussion.md - file: conclusion.md - file: appendix.md diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index 06619d6e..9b09a38f 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -78,7 +78,7 @@ def generate(self): undocumented_students_target=0.21 * 1.9e6, ) logging.info("Adding family variables") - add_spm_variables(cps, spm_unit) + add_spm_variables(self, cps, spm_unit) logging.info("Adding household variables") add_household_variables(cps, household) logging.info("Adding rent") @@ -602,7 +602,11 @@ def add_personal_income_variables( cps[f"{var}_would_be_qualified"] = rng.random(len(person)) < prob -def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None: +def add_spm_variables(self, cps: h5py.File, spm_unit: DataFrame) -> None: + from policyengine_us_data.utils.spm import ( + calculate_spm_thresholds_with_geoadj, + ) + SPM_RENAMES = dict( spm_unit_total_income_reported="SPM_TOTVAL", snap_reported="SPM_SNAPSUB", @@ -616,7 +620,6 @@ def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None: # State tax includes refundable credits. spm_unit_state_tax_reported="SPM_STTAX", spm_unit_capped_work_childcare_expenses="SPM_CAPWKCCXPNS", - spm_unit_spm_threshold="SPM_POVTHRESHOLD", spm_unit_net_income_reported="SPM_RESOURCES", spm_unit_pre_subsidy_childcare_expenses="SPM_CHILDCAREXPNS", ) @@ -625,6 +628,16 @@ def add_spm_variables(cps: h5py.File, spm_unit: DataFrame) -> None: if asec_variable in spm_unit.columns: cps[openfisca_variable] = spm_unit[asec_variable] + # Calculate SPM thresholds using spm-calculator with Census-provided + # geographic adjustment factors (SPM_GEOADJ) + cps["spm_unit_spm_threshold"] = calculate_spm_thresholds_with_geoadj( + num_adults=spm_unit["SPM_NUMADULTS"].values, + num_children=spm_unit["SPM_NUMKIDS"].values, + tenure_codes=spm_unit["SPM_TENMORTSTATUS"].values, + geoadj=spm_unit["SPM_GEOADJ"].values, + year=self.time_period, + ) + cps["reduced_price_school_meals_reported"] = ( cps["free_school_meals_reported"] * 0 ) diff --git a/policyengine_us_data/utils/spm.py b/policyengine_us_data/utils/spm.py new file mode 100644 index 00000000..070db533 --- /dev/null +++ b/policyengine_us_data/utils/spm.py @@ -0,0 +1,53 @@ +"""SPM threshold calculation utilities using the spm-calculator package.""" + +import numpy as np +from spm_calculator import SPMCalculator, spm_equivalence_scale + + +TENURE_CODE_MAP = { + 1: "owner_with_mortgage", + 2: "owner_without_mortgage", + 3: "renter", +} + + +def calculate_spm_thresholds_with_geoadj( + num_adults: np.ndarray, + num_children: np.ndarray, + tenure_codes: np.ndarray, + geoadj: np.ndarray, + year: int, +) -> np.ndarray: + """ + Calculate SPM thresholds using Census-provided geographic adjustments. + + This function uses the SPM_GEOADJ values already computed by the Census + Bureau, combined with spm-calculator's base thresholds and equivalence + scale formula. This avoids the need for a Census API key. + + Args: + num_adults: Array of number of adults (18+) in each SPM unit. + num_children: Array of number of children (<18) in each SPM unit. + tenure_codes: Array of Census tenure/mortgage status codes. + 1 = owner with mortgage, 2 = owner without mortgage, 3 = renter. + geoadj: Array of Census SPM_GEOADJ geographic adjustment factors. + year: The year for which to calculate thresholds. + + Returns: + Array of SPM threshold values. + """ + calc = SPMCalculator(year=year) + base_thresholds = calc.get_base_thresholds() + + n = len(num_adults) + thresholds = np.zeros(n) + + for i in range(n): + tenure_str = TENURE_CODE_MAP.get(int(tenure_codes[i]), "renter") + base = base_thresholds[tenure_str] + equiv_scale = spm_equivalence_scale( + int(num_adults[i]), int(num_children[i]) + ) + thresholds[i] = base * equiv_scale * geoadj[i] + + return thresholds diff --git a/pyproject.toml b/pyproject.toml index 495feafe..14acb68e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ dependencies = [ "sqlalchemy>=2.0.41", "sqlmodel>=0.0.24", "xlrd>=2.0.2", + "spm-calculator>=0.1.0", ] [project.optional-dependencies]