diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..fab83b88 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + added: + - Added --use-tob flag for TOB (Taxation of Benefits) revenue calibration targeting OASDI and HI trust fund revenue diff --git a/policyengine_us_data/datasets/cps/long_term/README.md b/policyengine_us_data/datasets/cps/long_term/README.md index 9ca467f7..c20216dc 100644 --- a/policyengine_us_data/datasets/cps/long_term/README.md +++ b/policyengine_us_data/datasets/cps/long_term/README.md @@ -6,8 +6,8 @@ Run projections using `run_household_projection.py`: ```bash -# Recommended: GREG with all three constraint types -python run_household_projection.py 2100 --greg --use-ss --use-payroll --save-h5 +# Recommended: GREG with all constraint types +python run_household_projection.py 2100 --greg --use-ss --use-payroll --use-tob --save-h5 # IPF with only age distribution constraints (faster, less accurate) python run_household_projection.py 2050 @@ -21,6 +21,7 @@ python run_household_projection.py 2100 --greg --use-ss - `--greg`: Use GREG calibration instead of IPF - `--use-ss`: Include Social Security benefit totals as calibration target (requires `--greg`) - `--use-payroll`: Include taxable payroll totals as calibration target (requires `--greg`) +- `--use-tob`: Include TOB (Taxation of Benefits) revenue as calibration target (requires `--greg`) - `--save-h5`: Save year-specific .h5 files to `./projected_datasets/` directory **Estimated runtime:** ~2 minutes/year without `--save-h5`, ~3 minutes/year with `--save-h5` @@ -36,7 +37,7 @@ python run_household_projection.py 2100 --greg --use-ss **GREG (Generalized Regression Estimator)** - Solves for weights matching multiple constraints simultaneously -- Can enforce age distribution + Social Security benefits + taxable payroll +- Can enforce age distribution + Social Security benefits + taxable payroll + TOB revenue - One-shot solution using `samplics` package - **Recommended** for accurate long-term projections @@ -57,17 +58,29 @@ python run_household_projection.py 2100 --greg --use-ss - Calculated as: `taxable_earnings_for_social_security` + `social_security_taxable_self_employment_income` - Source: SSA Trustee Report 2024 (`social_security_aux.csv`) +4. **TOB Revenue** (`--use-tob`, GREG only) + - Taxation of Benefits revenue for OASDI and Medicare HI trust funds + - OASDI: `tob_revenue_oasdi` (tier 1 taxation, 0-50% of benefits) + - HI: `tob_revenue_medicare_hi` (tier 2 taxation, 50-85% of benefits) + - Source: SSA Trustee Report 2024 (`social_security_aux.csv`) + --- ### Data Sources -All data from **SSA 2024 Trustee Report**: +**SSA 2025 OASDI Trustees Report** +- URL: https://www.ssa.gov/OACT/TR/2025/ +- File: `SingleYearTRTables_TR2025.xlsx` +- Tables: IV.B2 (OASDI TOB % of taxable payroll), VI.G6 (taxable payroll in billions), VI.G9 (OASDI costs) -- `SSPopJul_TR2024.csv` - Population projections 2025-2100 by single year of age -- `social_security_aux.csv` - OASDI costs and taxable payroll projections 2025-2100 - - Extracted from `SingleYearTRTables_TR2025.xlsx` Table VI.G9 using `extract_ssa_costs.py` +**CMS 2025 Medicare Trustees Report** +- URL: https://www.cms.gov/data-research/statistics-trends-and-reports/trustees-report-trust-funds +- File: `tr2025-tables-figures.zip` → CSV folder → "Medicare Sources of Non-Interest Income..." +- Column: Tax on Benefits (values in millions, 2024-2099) -Files located in: `policyengine_us_data/storage/` +**Local files** (in `policyengine_us_data/storage/`): +- `SSPopJul_TR2024.csv` - Population projections 2025-2100 by single year of age +- `social_security_aux.csv` - OASDI costs, taxable payroll, and TOB revenue projections 2025-2100 --- diff --git a/policyengine_us_data/datasets/cps/long_term/calibration.py b/policyengine_us_data/datasets/cps/long_term/calibration.py index 5019baab..e9d35372 100644 --- a/policyengine_us_data/datasets/cps/long_term/calibration.py +++ b/policyengine_us_data/datasets/cps/long_term/calibration.py @@ -85,6 +85,10 @@ def calibrate_greg( payroll_target=None, h6_income_values=None, h6_revenue_target=None, + oasdi_tob_values=None, + oasdi_tob_target=None, + hi_tob_values=None, + hi_tob_target=None, n_ages=86, ): """ @@ -101,6 +105,10 @@ def calibrate_greg( payroll_target: Optional taxable payroll target total h6_income_values: Optional H6 reform income values per household h6_revenue_target: Optional H6 reform total revenue impact target + oasdi_tob_values: Optional OASDI TOB revenue values per household + oasdi_tob_target: Optional OASDI TOB revenue target total + hi_tob_values: Optional HI TOB revenue values per household + hi_tob_target: Optional HI TOB revenue target total n_ages: Number of age groups Returns: @@ -116,6 +124,8 @@ def calibrate_greg( (ss_values is not None and ss_target is not None) or (payroll_values is not None and payroll_target is not None) or (h6_income_values is not None and h6_revenue_target is not None) + or (oasdi_tob_values is not None and oasdi_tob_target is not None) + or (hi_tob_values is not None and hi_tob_target is not None) ) if needs_aux_df: @@ -135,6 +145,14 @@ def calibrate_greg( aux_df["h6_revenue"] = h6_income_values controls["h6_revenue"] = h6_revenue_target + if oasdi_tob_values is not None and oasdi_tob_target is not None: + aux_df["oasdi_tob"] = oasdi_tob_values + controls["oasdi_tob"] = oasdi_tob_target + + if hi_tob_values is not None and hi_tob_target is not None: + aux_df["hi_tob"] = hi_tob_values + controls["hi_tob"] = hi_tob_target + aux_vars = aux_df else: aux_vars = X @@ -160,6 +178,10 @@ def calibrate_weights( payroll_target=None, h6_income_values=None, h6_revenue_target=None, + oasdi_tob_values=None, + oasdi_tob_target=None, + hi_tob_values=None, + hi_tob_target=None, n_ages=86, max_iters=100, tol=1e-6, @@ -180,6 +202,10 @@ def calibrate_weights( payroll_target: Optional payroll target (for GREG with payroll) h6_income_values: Optional H6 reform income values per household h6_revenue_target: Optional H6 reform total revenue impact target + oasdi_tob_values: Optional OASDI TOB revenue values per household + oasdi_tob_target: Optional OASDI TOB revenue target total + hi_tob_values: Optional HI TOB revenue values per household + hi_tob_target: Optional HI TOB revenue target total n_ages: Number of age groups max_iters: Max iterations for IPF tol: Convergence tolerance for IPF @@ -204,6 +230,10 @@ def calibrate_weights( payroll_target, h6_income_values, h6_revenue_target, + oasdi_tob_values, + oasdi_tob_target, + hi_tob_values, + hi_tob_target, n_ages, ) except Exception as e: diff --git a/policyengine_us_data/datasets/cps/long_term/check_calibrated_estimates_interactive.py b/policyengine_us_data/datasets/cps/long_term/check_calibrated_estimates_interactive.py new file mode 100644 index 00000000..af041484 --- /dev/null +++ b/policyengine_us_data/datasets/cps/long_term/check_calibrated_estimates_interactive.py @@ -0,0 +1,326 @@ +import os + +import pandas as pd +import numpy as np + +from policyengine_us import Microsimulation + +# H5_PATH = 'hf://policyengine/test/' +H5_PATH = "/home/baogorek/devl/sep/policyengine-us-data/policyengine_us_data/datasets/cps/long_term/projected_datasets/" + +# 2027 -------------------------------------- +sim = Microsimulation(dataset=H5_PATH + "2027.h5") +parameters = sim.tax_benefit_system.parameters + +## Total social security +assert sim.default_calculation_period == "2027" +ss_estimate_cost_b = sim.calculate("social_security").sum() / 1e9 + +### Trustees SingleYearTRTables_TR2025.xlsx, Tab VI.G10 (nominal dollars in billions) +### Intermediate scenario for row 69, for Intermediate Scenario, 2027, Cost is: $1,715 billion +ss_trustees_cost_b = 1_800 +assert round(ss_estimate_cost_b) == ss_trustees_cost_b + +## Taxable Payroll for Social Security +taxible_estimate_b = ( + sim.calculate("taxable_earnings_for_social_security").sum() / 1e9 + + sim.calculate("social_security_taxable_self_employment_income").sum() + / 1e9 +) + +### Trustees SingleYearTRTables_TR2025.xlsx, Tab VI.G6 (nominal dollars in billions) +### Intermediate scenario for row 69, for Intermediate Scenario, 2027, Cost is: $1,715 billion +ss_trustees_payroll_b = 11_627 +assert round(taxible_estimate_b) == ss_trustees_payroll_b + +## Population demographics, total + +### Population count of 6 year olds +person_weights = sim.calculate("age", map_to="person").weights +person_ages = sim.calculate("age", map_to="person").values +person_is_6 = person_ages == 6 + +total_age6_est = np.sum(person_is_6 * person_weights) + +### Single Year Age demographic projections - latest published is 2024: +### "Mid Year" CSV from https://www.ssa.gov/oact/HistEst/Population/2024/Population2024.html +### Row 8694, Col C, contains 3730632 +ss_age6_pop = 3_730_632 +assert ss_age6_pop == round(total_age6_est) + + +# 2100 -------------------------------------- +sim = Microsimulation(dataset=H5_PATH + "2100.h5") +parameters = sim.tax_benefit_system.parameters + +## Total social security +assert sim.default_calculation_period == "2100" +ss_estimate_cost_b = sim.calculate("social_security").sum() / 1e9 + +### Trustees SingleYearTRTables_TR2025.xlsx, Tab VI.G10 (nominal dollars in billions) +### Intermediate scenario for row 69, for Intermediate Scenario, 2100, Cost is: $34,432 billion +ss_trustees_cost_b = 34_432 +# Rounding takes it off a bit. We calibrated to CPI ratio times 2025-dollar cost +assert np.allclose(ss_estimate_cost_b, ss_trustees_cost_b, rtol=0.0001) + +## Taxable Payroll for Social Security +taxible_estimate_b = ( + sim.calculate("taxable_earnings_for_social_security").sum() / 1e9 + + sim.calculate("social_security_taxable_self_employment_income").sum() + / 1e9 +) + +### Trustees SingleYearTRTables_TR2025.xlsx, Tab VI.G6 (nominal dollars in billions) +### Intermediate scenario for row 143, for Intermediate Scenario, 2100, Cost is: $187,614 billion +ss_trustees_payroll_b = 187_614 +assert round(taxible_estimate_b) == ss_trustees_payroll_b + +## Population demographics, total + +### Population count of 6 year olds +person_weights = sim.calculate("age", map_to="person").weights +person_ages = sim.calculate("age", map_to="person").values +person_is_6 = person_ages == 6 + +total_age6_est = np.sum(person_is_6 * person_weights) + +### Single Year Age demographic projections - latest published is 2024: +### "Mid Year" CSV from https://www.ssa.gov/oact/HistEst/Population/2024/Population2024.html +### Row 16067, Col C, contains 5162540 +ss_age6_pop = 5_162_540 +assert np.allclose(ss_age6_pop, total_age6_est, atol=1) + +# Taxation of benefits ------- +# Trustees report stops at 2099 so project at current rate one year out, bring to billions +sim.default_calculation_period +hi_tob_trustees = 1761.5 +hi_tob_estimate_b = ( + sim.calculate("tob_revenue_medicare_hi", map_to="household").sum() / 1e9 +) +hi_tob_estimate_b + +oasdi_tob_trustees = 2101.3 +oasdi_tob_estimate_b = sim.calculate("tob_revenue_oasdi").sum() / 1e9 +oasdi_tob_estimate_b + + +# Testing the H6 Reform ------------------------------------------------------ + +from policyengine_us import Microsimulation +from policyengine_core.reforms import Reform + + +def create_h6_reform(): + """ + Implements Proposal H6: + 1. Phase out OASDI taxation (Tier 1) from 2045-2053 by raising thresholds. + 2. Eliminate OASDI taxation fully in 2054+ (set Tier 1 rate to 0%). + 3. HOLD HARMLESS: Maintain HI taxation (Tier 2) revenue at current law levels throughout. + + CRITICAL: Handles the "Threshold Crossover" problem. + As OASDI thresholds rise above HI thresholds ($34k/$44k), we must + swap the parameter definitions to prevent the engine from breaking. + """ + + reform_payload = { + # Thresholds + "gov.irs.social_security.taxability.threshold.base.main.SINGLE": {}, + "gov.irs.social_security.taxability.threshold.base.main.JOINT": {}, + "gov.irs.social_security.taxability.threshold.base.main.HEAD_OF_HOUSEHOLD": {}, + "gov.irs.social_security.taxability.threshold.base.main.SURVIVING_SPOUSE": {}, + "gov.irs.social_security.taxability.threshold.base.main.SEPARATE": {}, + "gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE": {}, + "gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT": {}, + "gov.irs.social_security.taxability.threshold.adjusted_base.main.HEAD_OF_HOUSEHOLD": {}, + "gov.irs.social_security.taxability.threshold.adjusted_base.main.SURVIVING_SPOUSE": {}, + "gov.irs.social_security.taxability.threshold.adjusted_base.main.SEPARATE": {}, + # Rates - Base (Tier 1) + "gov.irs.social_security.taxability.rate.base.benefit_cap": {}, + "gov.irs.social_security.taxability.rate.base.excess": {}, + # Rates - Additional (Tier 2 - HI) + "gov.irs.social_security.taxability.rate.additional.benefit_cap": {}, + "gov.irs.social_security.taxability.rate.additional.excess": {}, + } + + # --- CONSTANTS: CURRENT LAW HI THRESHOLDS (FROZEN) --- + # We must preserve these specific triggers to protect the HI Trust Fund + HI_SINGLE = 34_000 + HI_JOINT = 44_000 + + # --- PHASE 1: THE TRANSITION (2045-2053) --- + for year in range(2045, 2054): + period = f"{year}-01-01" + i = year - 2045 + + # 1. Calculate the Target OASDI Thresholds (Rising) + # (a) 2045 = $32,500 ... (i) 2053 = $92,500 + oasdi_target_single = 32_500 + (7_500 * i) + oasdi_target_joint = 65_000 + (15_000 * i) + + # 2. Handle Threshold Crossover + # OASDI thresholds rise above HI thresholds during phase-out. + # We must swap parameters: put lower threshold in 'base' slot. + + # --- SET RATES FOR TRANSITION (2045-2053) --- + # Joint filers cross immediately in 2045 ($65k OASDI > $44k HI). + # Single filers cross in 2046 ($40k OASDI > $34k HI). + # + # PolicyEngine forces one global rate structure per year. + # We choose swapped rates (0.35/0.85) for ALL years to minimize error: + # + # Trade-off in 2045: + # - Single filers: $225 undertax (15% on $1.5k range) ✓ acceptable + # - Joint filers: Would be $3,150 overtax with default rates ✗ unacceptable + # + # The swapped rate error is 14x smaller and aligns with tax-cutting intent. + + # Tier 1 (Base): HI ONLY (35%) + reform_payload[ + "gov.irs.social_security.taxability.rate.base.benefit_cap" + ][period] = 0.35 + reform_payload["gov.irs.social_security.taxability.rate.base.excess"][ + period + ] = 0.35 + + # Tier 2 (Additional): HI + OASDI Combined (85%) + reform_payload[ + "gov.irs.social_security.taxability.rate.additional.benefit_cap" + ][period] = 0.85 + reform_payload[ + "gov.irs.social_security.taxability.rate.additional.excess" + ][period] = 0.85 + + # --- SET THRESHOLDS (MIN/MAX SWAP) --- + # Always put the smaller number in 'base' and larger in 'adjusted_base' + + # Single + reform_payload[ + "gov.irs.social_security.taxability.threshold.base.main.SINGLE" + ][period] = min(oasdi_target_single, HI_SINGLE) + reform_payload[ + "gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE" + ][period] = max(oasdi_target_single, HI_SINGLE) + + # Joint + reform_payload[ + "gov.irs.social_security.taxability.threshold.base.main.JOINT" + ][period] = min(oasdi_target_joint, HI_JOINT) + reform_payload[ + "gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT" + ][period] = max(oasdi_target_joint, HI_JOINT) + + # Map other statuses (Head/Surviving Spouse -> Single logic, Separate -> Single logic usually) + # Note: Separate is usually 0, but for H6 strictness we map to Single logic here + for status in ["HEAD_OF_HOUSEHOLD", "SURVIVING_SPOUSE", "SEPARATE"]: + reform_payload[ + f"gov.irs.social_security.taxability.threshold.base.main.{status}" + ][period] = min(oasdi_target_single, HI_SINGLE) + reform_payload[ + f"gov.irs.social_security.taxability.threshold.adjusted_base.main.{status}" + ][period] = max(oasdi_target_single, HI_SINGLE) + + # --- PHASE 2: ELIMINATION (2054+) --- + # OASDI is gone. We only collect HI. + # Logic: "Base" becomes the HI tier ($34k). Rate is 0.35. + # "Adjusted" becomes irrelevant (set high or rate to same). + + elim_period = "2054-01-01.2100-12-31" + + # 1. Set Thresholds to "HI Only" mode + # Base = $34k / $44k + reform_payload[ + "gov.irs.social_security.taxability.threshold.base.main.SINGLE" + ][elim_period] = HI_SINGLE + reform_payload[ + "gov.irs.social_security.taxability.threshold.base.main.JOINT" + ][elim_period] = HI_JOINT + + # Adjusted = Infinity (Disable the second tier effectively) + reform_payload[ + "gov.irs.social_security.taxability.threshold.adjusted_base.main.SINGLE" + ][elim_period] = 9_999_999 + reform_payload[ + "gov.irs.social_security.taxability.threshold.adjusted_base.main.JOINT" + ][elim_period] = 9_999_999 + + # Map others + for status in ["HEAD_OF_HOUSEHOLD", "SURVIVING_SPOUSE", "SEPARATE"]: + reform_payload[ + f"gov.irs.social_security.taxability.threshold.base.main.{status}" + ][elim_period] = HI_SINGLE + reform_payload[ + f"gov.irs.social_security.taxability.threshold.adjusted_base.main.{status}" + ][elim_period] = 9_999_999 + + # 2. Set Rates for HI Only Revenue + # Tier 1 (Now the ONLY tier) = 35% (HI Share) + reform_payload["gov.irs.social_security.taxability.rate.base.benefit_cap"][ + elim_period + ] = 0.35 + reform_payload["gov.irs.social_security.taxability.rate.base.excess"][ + elim_period + ] = 0.35 + + # Tier 2 (Disabled via threshold, but zero out for safety) + reform_payload[ + "gov.irs.social_security.taxability.rate.additional.benefit_cap" + ][elim_period] = 0.35 + reform_payload[ + "gov.irs.social_security.taxability.rate.additional.excess" + ][elim_period] = 0.35 + + return reform_payload + + +# Create the reform +h6_reform_payload = create_h6_reform() +h6_reform = Reform.from_dict(h6_reform_payload, country_id="us") + +year = 2052 +dataset_path = f"/home/baogorek/devl/sep/policyengine-us-data/policyengine_us_data/datasets/cps/long_term/projected_datasets/{year}.h5" +# dataset_path = f'hf://policyengine/test/{year}.h5' + +# Baseline simulation +baseline = Microsimulation(dataset=dataset_path) +baseline_revenue = baseline.calculate("income_tax").sum() + +# Reform simulation +reform_sim = Microsimulation(dataset=dataset_path, reform=h6_reform) +reform_revenue = reform_sim.calculate("income_tax").sum() + +# Ha we know this will fail because the hacky reform flips the tiers that the tob variables depend on! +# Just hoping this is truly isolated +# First, let's make sure there's no taxation on benefits after the reform! +# assert reform_sim.calculate("tob_revenue_oasdi").sum() / 1E9 == 0 + +# Calculate impact +revenue_impact = reform_revenue - baseline_revenue +print(f"revenue_impact (B): {revenue_impact / 1E9:.2f}") + +# Calculate taxable payroll +taxable_ss_earnings = baseline.calculate( + "taxable_earnings_for_social_security" +) +taxable_self_employment = baseline.calculate( + "social_security_taxable_self_employment_income" +) +total_taxable_payroll = ( + taxable_ss_earnings.sum() + taxable_self_employment.sum() +) + +# Calculate SS benefits +ss_benefits = baseline.calculate("social_security") +total_ss_benefits = ss_benefits.sum() + +est_rev_as_pct_of_taxable_payroll = ( + 100 * revenue_impact / total_taxable_payroll +) + +# From https://www.ssa.gov/oact/solvency/provisions/tables/table_run133.html: +target_rev_as_pct_of_taxable_payroll = -1.12 + +assert np.allclose( + est_rev_as_pct_of_taxable_payroll, + target_rev_as_pct_of_taxable_payroll, + atol=0.01, +) diff --git a/policyengine_us_data/datasets/cps/long_term/projection_utils.py b/policyengine_us_data/datasets/cps/long_term/projection_utils.py index 04c23b04..d0af8533 100644 --- a/policyengine_us_data/datasets/cps/long_term/projection_utils.py +++ b/policyengine_us_data/datasets/cps/long_term/projection_utils.py @@ -41,6 +41,32 @@ def build_household_age_matrix(sim, n_ages=86): return X, household_ids_unique, hh_id_to_idx +def get_pseudo_input_variables(sim): + """ + Identify variables that appear as inputs but aggregate calculated values. + + These variables have 'adds' attribute but no formula, yet their components + ARE calculated. Storing them leads to stale values corrupting calculations. + """ + tbs = sim.tax_benefit_system + pseudo_inputs = set() + + for var_name in sim.input_variables: + var = tbs.variables.get(var_name) + if not var: + continue + adds = getattr(var, "adds", None) + if not adds or not isinstance(adds, list): + continue + for component in adds: + comp_var = tbs.variables.get(component) + if comp_var and len(getattr(comp_var, "formulas", {})) > 0: + pseudo_inputs.add(var_name) + break + + return pseudo_inputs + + def create_household_year_h5( year, household_weights, base_dataset_path, output_dir ): @@ -66,6 +92,16 @@ def create_household_year_h5( df = sim.to_input_dataframe() + # Remove pseudo-input variables (aggregates of calculated values) + pseudo_inputs = get_pseudo_input_variables(sim) + cols_to_drop = [ + f"{var}__{base_period}" + for var in pseudo_inputs + if f"{var}__{base_period}" in df.columns + ] + if cols_to_drop: + df.drop(columns=cols_to_drop, inplace=True) + household_ids = sim.calculate("household_id", map_to="household").values person_household_id = df[f"person_household_id__{base_period}"] diff --git a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py index 9dbb7f2d..651f7b50 100644 --- a/policyengine_us_data/datasets/cps/long_term/run_household_projection.py +++ b/policyengine_us_data/datasets/cps/long_term/run_household_projection.py @@ -3,7 +3,7 @@ Usage: - python run_household_projection.py [START_YEAR] [END_YEAR] [--greg] [--use-ss] [--use-payroll] [--use-h6-reform] [--save-h5] + python run_household_projection.py [START_YEAR] [END_YEAR] [--greg] [--use-ss] [--use-payroll] [--use-h6-reform] [--use-tob] [--save-h5] START_YEAR: Optional starting year (default: 2025) END_YEAR: Optional ending year (default: 2035) @@ -11,11 +11,12 @@ --use-ss: Include Social Security benefit totals as calibration target (requires --greg) --use-payroll: Include taxable payroll totals as calibration target (requires --greg) --use-h6-reform: Include H6 reform income impact ratio as calibration target (requires --greg) + --use-tob: Include TOB (Taxation of Benefits) revenue as calibration target (requires --greg) --save-h5: Save year-specific .h5 files with calibrated weights to ./projected_datasets/ Examples: python run_household_projection.py 2045 2045 --greg --use-ss # single year - python run_household_projection.py 2025 2100 --greg --use-ss --use-payroll --use-h6-reform --save-h5 + python run_household_projection.py 2025 2100 --greg --use-ss --use-payroll --use-tob --save-h5 """ import sys @@ -256,6 +257,16 @@ def create_h6_reform(): USE_GREG = True from ssa_data import load_h6_income_rate_change +USE_TOB = "--use-tob" in sys.argv +if USE_TOB: + sys.argv.remove("--use-tob") + if not USE_GREG: + print( + "Warning: --use-tob requires --greg, enabling GREG automatically" + ) + USE_GREG = True + from ssa_data import load_oasdi_tob_projections, load_hi_tob_projections + SAVE_H5 = "--save-h5" in sys.argv if SAVE_H5: sys.argv.remove("--save-h5") @@ -264,8 +275,13 @@ def create_h6_reform(): END_YEAR = int(sys.argv[2]) if len(sys.argv) > 2 else 2035 if USE_GREG: - from samplics.weighting import SampleWeight - + try: + from samplics.weighting import SampleWeight + except ImportError: + raise ImportError( + "samplics is required for GREG calibration. " + "Install with: pip install policyengine-us-data[calibration]" + ) calibrator = SampleWeight() else: calibrator = None @@ -286,6 +302,8 @@ def create_h6_reform(): print(f" Including taxable payroll constraint: Yes") if USE_H6_REFORM: print(f" Including H6 reform income impact constraint: Yes") +if USE_TOB: + print(f" Including TOB revenue constraint: Yes") if SAVE_H5: print(f" Saving year-specific .h5 files: Yes (to {OUTPUT_DIR}/)") os.makedirs(OUTPUT_DIR, exist_ok=True) @@ -428,7 +446,10 @@ def create_h6_reform(): # Only calculate H6 reform impacts if the target ratio is non-zero # (Reform has no effect before 2045, so skip computation for efficiency) - if h6_target_ratio != 0: + if h6_target_ratio == 0: + if year in display_years: + print(f" [DEBUG {year}] H6 reform not active until 2045") + else: # Create and apply H6 reform h6_reform = create_h6_reform() reform_sim = Microsimulation( @@ -464,6 +485,33 @@ def create_h6_reform(): del reform_sim gc.collect() + oasdi_tob_values = None + oasdi_tob_target = None + hi_tob_values = None + hi_tob_target = None + if USE_TOB: + oasdi_tob_hh = sim.calculate( + "tob_revenue_oasdi", period=year, map_to="household" + ) + oasdi_tob_values = oasdi_tob_hh.values + oasdi_tob_target = load_oasdi_tob_projections(year) + + hi_tob_hh = sim.calculate( + "tob_revenue_medicare_hi", period=year, map_to="household" + ) + hi_tob_values = hi_tob_hh.values + hi_tob_target = load_hi_tob_projections(year) + + if year in display_years: + oasdi_baseline = np.sum(oasdi_tob_values * baseline_weights) + hi_baseline = np.sum(hi_tob_values * baseline_weights) + print( + f" [DEBUG {year}] OASDI TOB baseline: ${oasdi_baseline/1e9:.1f}B, target: ${oasdi_tob_target/1e9:.1f}B" + ) + print( + f" [DEBUG {year}] HI TOB baseline: ${hi_baseline/1e9:.1f}B, target: ${hi_tob_target/1e9:.1f}B" + ) + y_target = target_matrix[:, year_idx] w_new, iterations = calibrate_weights( @@ -478,22 +526,43 @@ def create_h6_reform(): payroll_target=payroll_target, h6_income_values=h6_income_values, h6_revenue_target=h6_revenue_target, + oasdi_tob_values=oasdi_tob_values, + oasdi_tob_target=oasdi_tob_target, + hi_tob_values=hi_tob_values, + hi_tob_target=hi_tob_target, n_ages=n_ages, max_iters=100, tol=1e-6, verbose=False, ) - if year in display_years and (USE_SS or USE_PAYROLL or USE_H6_REFORM): + if year in display_years and USE_GREG: + neg_mask = w_new < 0 + n_neg = neg_mask.sum() + if n_neg > 0: + pct_neg = 100 * n_neg / len(w_new) + max_neg = np.abs(w_new[neg_mask]).max() + print( + f" [DEBUG {year}] Negative weights: {n_neg} ({pct_neg:.2f}%), " + f"largest: {max_neg:,.0f}" + ) + else: + print( + f" [DEBUG {year}] Negative weights: 0 (all weights non-negative)" + ) + + if year in display_years and ( + USE_SS or USE_PAYROLL or USE_H6_REFORM or USE_TOB + ): if USE_SS: ss_achieved = np.sum(ss_values * w_new) print( - f" [DEBUG {year}] SS achieved: ${ss_achieved/1e9:.1f}B (error: {(ss_achieved - ss_target)/ss_target*100:.1f}%)" + f" [DEBUG {year}] SS achieved: ${ss_achieved/1e9:.1f}B (error: ${abs(ss_achieved - ss_target)/1e6:.1f}M, {(ss_achieved - ss_target)/ss_target*100:.3f}%)" ) if USE_PAYROLL: payroll_achieved = np.sum(payroll_values * w_new) print( - f" [DEBUG {year}] Payroll achieved: ${payroll_achieved/1e9:.1f}B (error: {(payroll_achieved - payroll_target)/payroll_target*100:.1f}%)" + f" [DEBUG {year}] Payroll achieved: ${payroll_achieved/1e9:.1f}B (error: ${abs(payroll_achieved - payroll_target)/1e6:.1f}M, {(payroll_achieved - payroll_target)/payroll_target*100:.3f}%)" ) if USE_H6_REFORM and h6_revenue_target is not None: h6_revenue_achieved = np.sum(h6_income_values * w_new) @@ -505,7 +574,16 @@ def create_h6_reform(): else 0 ) print( - f" [DEBUG {year}] H6 achieved revenue: ${h6_revenue_achieved/1e9:.3f}B (error: {error_pct:.1f}%)" + f" [DEBUG {year}] H6 achieved revenue: ${h6_revenue_achieved/1e9:.3f}B (error: ${abs(h6_revenue_achieved - h6_revenue_target)/1e6:.1f}M, {error_pct:.3f}%)" + ) + if USE_TOB: + oasdi_achieved = np.sum(oasdi_tob_values * w_new) + hi_achieved = np.sum(hi_tob_values * w_new) + print( + f" [DEBUG {year}] OASDI TOB achieved: ${oasdi_achieved/1e9:.1f}B (error: ${abs(oasdi_achieved - oasdi_tob_target)/1e6:.1f}M, {(oasdi_achieved - oasdi_tob_target)/oasdi_tob_target*100:.3f}%)" + ) + print( + f" [DEBUG {year}] HI TOB achieved: ${hi_achieved/1e9:.1f}B (error: ${abs(hi_achieved - hi_tob_target)/1e6:.1f}M, {(hi_achieved - hi_tob_target)/hi_tob_target*100:.3f}%)" ) weights_matrix[:, year_idx] = w_new diff --git a/policyengine_us_data/datasets/cps/long_term/ssa_data.py b/policyengine_us_data/datasets/cps/long_term/ssa_data.py index 46582262..6b76ae21 100644 --- a/policyengine_us_data/datasets/cps/long_term/ssa_data.py +++ b/policyengine_us_data/datasets/cps/long_term/ssa_data.py @@ -89,3 +89,39 @@ def load_h6_income_rate_change(year): row = df[df["year"] == year] # CSV stores as percentage (e.g., -0.18), convert to decimal return row["h6_income_rate_change"].values[0] / 100 + + +def load_oasdi_tob_projections(year): + """ + Load OASDI TOB (Taxation of Benefits) revenue target for a given year. + + Args: + year: Year to load OASDI TOB revenue for + + Returns: + Total OASDI TOB revenue in nominal dollars + """ + csv_path = STORAGE_FOLDER / "social_security_aux.csv" + df = pd.read_csv(csv_path) + + row = df[df["year"] == year] + nominal_billions = row["oasdi_tob_billions_nominal_usd"].values[0] + return nominal_billions * 1e9 + + +def load_hi_tob_projections(year): + """ + Load HI (Medicare) TOB revenue target for a given year. + + Args: + year: Year to load HI TOB revenue for + + Returns: + Total HI TOB revenue in nominal dollars + """ + csv_path = STORAGE_FOLDER / "social_security_aux.csv" + df = pd.read_csv(csv_path) + + row = df[df["year"] == year] + nominal_billions = row["hi_tob_billions_nominal_usd"].values[0] + return nominal_billions * 1e9 diff --git a/policyengine_us_data/storage/social_security_aux.csv b/policyengine_us_data/storage/social_security_aux.csv index 07f875fb..4b578758 100644 --- a/policyengine_us_data/storage/social_security_aux.csv +++ b/policyengine_us_data/storage/social_security_aux.csv @@ -1,77 +1,77 @@ -year,oasdi_cost_in_billion_2025_usd,cpi_w_intermediate,oasdi_cost_in_billion_nominal_usd,taxable_payroll_in_billion_nominal_usd,h6_income_rate_change -2025,1609,100,1609,10621,0 -2026,1660,102.49,1701.334,11129,0 -2027,1715,104.95,1799.8925,11627,0 -2028,1763,107.47,1894.6961,12159,0 -2029,1810,110.05,1991.905,12696,0 -2030,1856,112.69,2091.5264,13239,0 -2031,1903,115.4,2196.062,13798,0 -2032,1947,118.17,2300.7699,14380,0 -2033,1991,121,2409.11,14987,0 -2034,2032,123.91,2517.8512,15594,0 -2035,2073,126.88,2630.2224,16205,0 -2036,2114,129.93,2746.7202,16825,0 -2037,2155,133.04,2867.012,17465,0 -2038,2194,136.24,2989.1056,18132,0 -2039,2233,139.51,3115.2583,18819,0 -2040,2270,142.86,3242.922,19532,0 -2041,2306,146.28,3373.2168,20269,0 -2042,2342,149.79,3508.0818,21035,0 -2043,2378,153.39,3647.6142,21828,0 -2044,2415,157.07,3793.2405,22653,0 -2045,2452,160.84,3943.7968,23507,-0.07 -2046,2488,164.7,4097.736,24391,-0.12 -2047,2527,168.65,4261.7855,25313,-0.18 -2048,2567,172.7,4433.209,26270,-0.23 -2049,2609,176.85,4614.0165,27263,-0.27 -2050,2652,181.09,4802.5068,28300,-0.32 -2051,2696,185.44,4999.4624,29376,-0.36 -2052,2743,189.89,5208.6827,30494,-0.4 -2053,2792,194.44,5428.7648,31661,-0.43 -2054,2842,199.11,5658.7062,32869,-1 -2055,2895,203.89,5902.6155,34124,-1.01 -2056,2950,208.78,6159.01,35432,-1.01 -2057,3007,213.79,6428.6653,36790,-1.02 -2058,3066,218.93,6712.3938,38201,-1.03 -2059,3125,224.18,7005.625,39670,-1.04 -2060,3184,229.56,7309.1904,41196,-1.04 -2061,3243,235.07,7623.3201,42782,-1.05 -2062,3303,240.71,7950.6513,44429,-1.06 -2063,3362,246.49,8286.9938,46136,-1.06 -2064,3422,252.4,8637.128,47902,-1.07 -2065,3483,258.46,9002.1618,49733,-1.07 -2066,3544,264.66,9379.5504,51631,-1.08 -2067,3607,271.02,9775.6914,53598,-1.09 -2068,3670,277.52,10184.984,55637,-1.09 -2069,3735,284.18,10614.123,57746,-1.1 -2070,3801,291,11060.91,59930,-1.1 -2071,3867,297.99,11523.2733,62196,-1.11 -2072,3934,305.14,12004.2076,64543,-1.12 -2073,4002,312.46,12504.6492,66975,-1.12 -2074,4071,319.96,13025.5716,69501,-1.13 -2075,4139,327.64,13561.0196,72131,-1.13 -2076,4206,335.5,14111.13,74862,-1.14 -2077,4273,343.55,14679.8915,77698,-1.14 -2078,4339,351.8,15264.602,80650,-1.14 -2079,4403,360.24,15861.3672,83727,-1.15 -2080,4467,368.89,16478.3163,86933,-1.15 -2081,4530,377.74,17111.622,90268,-1.15 -2082,4593,386.81,17766.1833,93749,-1.15 -2083,4655,396.09,18437.9895,97381,-1.15 -2084,4716,405.6,19128.096,101163,-1.15 -2085,4775,415.33,19832.0075,105104,-1.15 -2086,4833,425.3,20554.749,109217,-1.14 -2087,4891,435.51,21300.7941,113504,-1.14 -2088,4948,445.96,22066.1008,117973,-1.14 -2089,5006,456.66,22860.3996,122629,-1.13 -2090,5064,467.62,23680.2768,127477,-1.13 -2091,5125,478.84,24540.55,132518,-1.13 -2092,5188,490.34,25438.8392,137764,-1.12 -2093,5254,502.1,26380.334,143215,-1.12 -2094,5323,514.16,27368.7368,148876,-1.12 -2095,5396,526.49,28409.4004,154754,-1.12 -2096,5472,539.13,29501.1936,160855,-1.12 -2097,5551,552.07,30645.4057,167185,-1.11 -2098,5633,565.32,31844.4756,173750,-1.11 -2099,5719,578.89,33106.7191,180557,-1.12 -2100,5809,592.78,34434.5902,187614,-1.12 +year,oasdi_cost_in_billion_2025_usd,cpi_w_intermediate,oasdi_cost_in_billion_nominal_usd,taxable_payroll_in_billion_nominal_usd,h6_income_rate_change,oasdi_tob_pct_of_taxable_payroll,oasdi_tob_billions_nominal_usd,hi_tob_billions_nominal_usd +2025,1609,100,1609,10621,0,0.57,60.5397,40.655 +2026,1660,102.49,1701.334,11129,0,0.69,76.7901,52.199 +2027,1715,104.95,1799.8925,11627,0,0.71,82.5517,60.569 +2028,1763,107.47,1894.6961,12159,0,0.73,88.7607,65.635 +2029,1810,110.05,1991.905,12696,0,0.75,95.22,70.797 +2030,1856,112.69,2091.5264,13239,0,0.78,103.2642,76.594 +2031,1903,115.4,2196.062,13798,0,0.81,111.7638,83.036 +2032,1947,118.17,2300.7699,14380,0,0.84,120.792,89.882 +2033,1991,121,2409.11,14987,0,0.86,128.8882,97.047 +2034,2032,123.91,2517.8512,15594,0,0.89,138.7866,104.521 +2035,2073,126.88,2630.2224,16205,0,0.90,145.845,111.683 +2036,2114,129.93,2746.7202,16825,0,0.91,153.1075,118.34 +2037,2155,133.04,2867.012,17465,0,0.91,158.9315,124.952 +2038,2194,136.24,2989.1056,18132,0,0.92,166.8144,131.665 +2039,2233,139.51,3115.2583,18819,0,0.93,175.0167,138.444 +2040,2270,142.86,3242.922,19532,0,0.94,183.6008,145.274 +2041,2306,146.28,3373.2168,20269,0,0.94,190.5286,152.175 +2042,2342,149.79,3508.0818,21035,0,0.94,197.729,159.209 +2043,2378,153.39,3647.6142,21828,0,0.95,207.366,166.412 +2044,2415,157.07,3793.2405,22653,0,0.95,215.2035,173.854 +2045,2452,160.84,3943.7968,23507,-0.07,0.96,225.6672,181.587 +2046,2488,164.7,4097.736,24391,-0.12,0.96,234.1536,189.555 +2047,2527,168.65,4261.7855,25313,-0.18,0.96,243.0048,197.851 +2048,2567,172.7,4433.209,26270,-0.23,0.97,254.819,206.586 +2049,2609,176.85,4614.0165,27263,-0.27,0.97,264.4511,215.751 +2050,2652,181.09,4802.5068,28300,-0.32,0.98,277.34,230.026 +2051,2696,185.44,4999.4624,29376,-0.36,0.98,287.8848,240.281 +2052,2743,189.89,5208.6827,30494,-0.4,0.99,301.8906,251.078 +2053,2792,194.44,5428.7648,31661,-0.43,0.99,313.4439,262.512 +2054,2842,199.11,5658.7062,32869,-1,1.00,328.69,274.469 +2055,2895,203.89,5902.6155,34124,-1.01,1.01,344.6524,287.161 +2056,2950,208.78,6159.01,35432,-1.01,1.01,357.8632,300.557 +2057,3007,213.79,6428.6653,36790,-1.02,1.02,375.258,314.676 +2058,3066,218.93,6712.3938,38201,-1.03,1.03,393.4703,329.538 +2059,3125,224.18,7005.625,39670,-1.04,1.04,412.568,344.937 +2060,3184,229.56,7309.1904,41196,-1.04,1.04,428.4384,360.893 +2061,3243,235.07,7623.3201,42782,-1.05,1.05,449.211,377.293 +2062,3303,240.71,7950.6513,44429,-1.06,1.06,470.9474,394.399 +2063,3362,246.49,8286.9938,46136,-1.06,1.06,489.0416,412.059 +2064,3422,252.4,8637.128,47902,-1.07,1.07,512.5514,430.272 +2065,3483,258.46,9002.1618,49733,-1.07,1.07,532.1431,449.287 +2066,3544,264.66,9379.5504,51631,-1.08,1.08,557.6148,469.05 +2067,3607,271.02,9775.6914,53598,-1.09,1.09,584.2182,489.656 +2068,3670,277.52,10184.984,55637,-1.09,1.09,606.4433,511.168 +2069,3735,284.18,10614.123,57746,-1.1,1.10,635.206,533.596 +2070,3801,291,11060.91,59930,-1.1,1.10,659.23,556.96 +2071,3867,297.99,11523.2733,62196,-1.11,1.11,690.3756,581.15 +2072,3934,305.14,12004.2076,64543,-1.12,1.12,722.8816,606.266 +2073,4002,312.46,12504.6492,66975,-1.12,1.12,750.12,632.641 +2074,4071,319.96,13025.5716,69501,-1.13,1.13,785.3613,659.863 +2075,4139,327.64,13561.0196,72131,-1.13,1.13,815.0803,688.165 +2076,4206,335.5,14111.13,74862,-1.14,1.14,853.4268,717.049 +2077,4273,343.55,14679.8915,77698,-1.14,1.14,885.7572,746.777 +2078,4339,351.8,15264.602,80650,-1.14,1.14,919.41,777.205 +2079,4403,360.24,15861.3672,83727,-1.15,1.15,962.8605,808.543 +2080,4467,368.89,16478.3163,86933,-1.15,1.15,999.7295,840.797 +2081,4530,377.74,17111.622,90268,-1.15,1.15,1038.082,873.88 +2082,4593,386.81,17766.1833,93749,-1.15,1.15,1078.1135,907.958 +2083,4655,396.09,18437.9895,97381,-1.15,1.15,1119.8815,943.051 +2084,4716,405.6,19128.096,101163,-1.15,1.15,1163.3745,978.965 +2085,4775,415.33,19832.0075,105104,-1.15,1.15,1208.696,1015.209 +2086,4833,425.3,20554.749,109217,-1.14,1.14,1245.0738,1052.674 +2087,4891,435.51,21300.7941,113504,-1.14,1.14,1293.9456,1090.789 +2088,4948,445.96,22066.1008,117973,-1.14,1.14,1344.8922,1130.016 +2089,5006,456.66,22860.3996,122629,-1.13,1.13,1385.7077,1170.571 +2090,5064,467.62,23680.2768,127477,-1.13,1.13,1440.4901,1212.549 +2091,5125,478.84,24540.55,132518,-1.13,1.13,1497.4534,1256.274 +2092,5188,490.34,25438.8392,137764,-1.12,1.12,1542.9568,1302.101 +2093,5254,502.1,26380.334,143215,-1.12,1.12,1604.008,1350.009 +2094,5323,514.16,27368.7368,148876,-1.12,1.12,1667.4112,1400.378 +2095,5396,526.49,28409.4004,154754,-1.12,1.12,1733.2448,1453.666 +2096,5472,539.13,29501.1936,160855,-1.12,1.12,1801.576,1509.442 +2097,5551,552.07,30645.4057,167185,-1.11,1.11,1855.7535,1567.878 +2098,5633,565.32,31844.4756,173750,-1.11,1.11,1928.625,1629.435 +2099,5719,578.89,33106.7191,180557,-1.12,1.12,2022.2384,1694.187 +2100,5809,592.78,34434.5902,187614,-1.12,1.12,2101.2768,1761.512 diff --git a/pyproject.toml b/pyproject.toml index c2c523e8..71510e34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,9 @@ dependencies = [ ] [project.optional-dependencies] +calibration = [ + "samplics", +] dev = [ "black", "pytest",