diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py index ca6759b2..83305ae5 100644 --- a/policyengine_us_data/datasets/cps/cps.py +++ b/policyengine_us_data/datasets/cps/cps.py @@ -242,29 +242,6 @@ def add_personal_variables(cps: h5py.File, person: DataFrame) -> None: ] cps["is_disabled"] = (person[DISABILITY_FLAGS] == 1).any(axis=1) - def _assign_some_newborns_to_pregnancy( - age: pd.Series, person: pd.DataFrame - ) -> pd.Series: - """Takes an array of ages, returns the new age array with the given percentage of newborns assigned a negative age (in pregnancy).""" - age = np.where( - person.A_AGE == 0, - np.where( - np.random.randint( - 0, 2, len(person) - ), # Random number of 0 or 1 - # If 1 is flipped, select a random number between -0.75 and 0 - # This will represent the pregnany month - # At -0.75 the pregnancy month is 0 and at -0.0001 the pregnancy month is 9 - np.random.uniform(-0.75, 0, len(person)), - # If 0 is flipped, the child is a newborn at the age of 0 to 1 - np.random.uniform(0, 1, len(person)), - ), - person.A_AGE, - ) - return age - - cps["age"] = _assign_some_newborns_to_pregnancy(cps["age"], person) - def children_per_parent(col: str) -> pd.DataFrame: """Calculate number of children in the household using parental pointers. diff --git a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py index e6bf595c..93b4580b 100644 --- a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py +++ b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py @@ -36,21 +36,46 @@ def test_ecps_has_mortgage_interest(): sim = Microsimulation(dataset=EnhancedCPS_2024) assert sim.calculate("deductible_mortgage_interest").sum() > 1 - assert sim.calculate("deductible_interest_expense").sum() > 1 -def test_newborns_and_pregnancies(): - from policyengine_us_data.datasets.cps import EnhancedCPS_2024 +def test_ecps_replicates_jct_tax_expenditures(): from policyengine_us import Microsimulation + from policyengine_core.reforms import Reform + from policyengine_us_data.datasets import EnhancedCPS_2024 + + # JCT tax expenditure targets + EXPENDITURE_TARGETS = { + "salt_deduction": 21.247e9, + "medical_expense_deduction": 11.4e9, + "charitable_deduction": 65.301e9, + "interest_deduction": 24.8e9, + } - sim = Microsimulation(dataset=EnhancedCPS_2024) - - # Test for unborn children (age < 0) - unborn = sim.calculate("age") < 0 - unborn_count = unborn.sum() - assert unborn_count > 0 - - # Test for newborns (0 <= age < 1) - newborns = (sim.calculate("age") >= 0) & (sim.calculate("age") < 1) - newborn_count = newborns.sum() - assert newborn_count > 0 + baseline = Microsimulation(dataset=EnhancedCPS_2024) + income_tax_b = baseline.calculate( + "income_tax", period=2024, map_to="household" + ) + + for deduction, target in EXPENDITURE_TARGETS.items(): + # Create reform that neutralizes the deduction + class RepealDeduction(Reform): + def apply(self): + self.neutralize_variable(deduction) + + # Run reform simulation + reformed = Microsimulation( + reform=RepealDeduction, dataset=EnhancedCPS_2024 + ) + income_tax_r = reformed.calculate( + "income_tax", period=2024, map_to="household" + ) + + # Calculate tax expenditure + tax_expenditure = (income_tax_r - income_tax_b).sum() + pct_error = abs((tax_expenditure - target) / target) + TOLERANCE = 0.15 + + print( + f"{deduction} tax expenditure {tax_expenditure/1e9:.1f}bn differs from target {target/1e9:.1f}bn by {pct_error:.2%}" + ) + assert pct_error < TOLERANCE, deduction diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py index b743dc46..3eacf024 100644 --- a/policyengine_us_data/utils/loss.py +++ b/policyengine_us_data/utils/loss.py @@ -2,6 +2,7 @@ from .soi import pe_to_soi, get_soi import numpy as np from policyengine_us_data.storage import STORAGE_FOLDER +from policyengine_core.reforms import Reform def fmt(x): @@ -132,6 +133,7 @@ def build_loss_matrix(dataset: type, time_period): from policyengine_us import Microsimulation sim = Microsimulation(dataset=dataset) + sim.default_calculation_period = time_period hh_id = sim.calculate("household_id", map_to="person") tax_unit_hh_id = sim.map_result( hh_id, "person", "tax_unit", how="value_from_first_person" @@ -252,7 +254,7 @@ def build_loss_matrix(dataset: type, time_period): "alimony_income": 13e9, "alimony_expense": 13e9, # Rough estimate, not CPS derived - "real_estate_taxes": 400e9, # Rough estimate between 350bn and 600bn total property tax collections + "real_estate_taxes": 500e9, # Rough estimate between 350bn and 600bn total property tax collections "rent": 735e9, # ACS total uprated by CPI } @@ -340,18 +342,22 @@ def build_loss_matrix(dataset: type, time_period): ) targets_array.append(row["population_under_5"]) - # Population by number of newborns and pregancies - age = sim.calculate("age").values infants = (age >= 0) & (age < 1) label = "census/infants" loss_matrix[label] = sim.map_result(infants, "person", "household") - targets_array.append(3_491_679) + # Total number of infants in the 1 Year ACS + INFANTS_2023 = 3_491_679 + INFANTS_2022 = 3_437_933 + # Assume infant population grows at the same rate from 2023. + infants_2024 = INFANTS_2023 * (INFANTS_2023 / INFANTS_2022) + targets_array.append(infants_2024) + + # SALT tax expenditure targeting - pregnancies = (age >= -0.75) & (age < 0) - label = "census/pregnancies" - loss_matrix[label] = sim.map_result(pregnancies, "person", "household") - targets_array.append(2_618_759) + _add_tax_expenditure_targets( + dataset, time_period, sim, loss_matrix, targets_array + ) if any(loss_matrix.isna().sum() > 0): raise ValueError("Some targets are missing from the loss matrix") @@ -360,3 +366,55 @@ def build_loss_matrix(dataset: type, time_period): raise ValueError("Some targets are missing from the targets array") return loss_matrix, np.array(targets_array) + + +def _add_tax_expenditure_targets( + dataset, + time_period, + baseline_simulation, + loss_matrix: pd.DataFrame, + targets_array: list, +): + from policyengine_us import Microsimulation + + income_tax_b = baseline_simulation.calculate( + "income_tax", map_to="household" + ).values + + # Dictionary of itemized deductions and their target values + # (in billions for 2024, per the 2024 JCT Tax Expenditures report) + # https://www.jct.gov/publications/2024/jcx-48-24/ + ITEMIZED_DEDUCTIONS = { + "salt_deduction": 21.247e9, + "medical_expense_deduction": 11.4e9, + "charitable_deduction": 65.301e9, + "interest_deduction": 24.8e9, + } + + def make_repeal_class(deduction_var): + # Create a custom Reform subclass that neutralizes the given deduction. + class RepealDeduction(Reform): + def apply(self): + self.neutralize_variable(deduction_var) + + return RepealDeduction + + for deduction, target in ITEMIZED_DEDUCTIONS.items(): + # Generate the custom repeal class for the current deduction. + RepealDeduction = make_repeal_class(deduction) + + # Run the microsimulation using the repeal reform. + simulation = Microsimulation(dataset=dataset, reform=RepealDeduction) + simulation.default_calculation_period = time_period + + # Calculate the baseline and reform income tax values. + income_tax_r = simulation.calculate( + "income_tax", map_to="household" + ).values + + # Compute the tax expenditure (TE) values. + te_values = income_tax_r - income_tax_b + + # Record the TE difference and the corresponding target value. + loss_matrix[f"jct/{deduction}_expenditure"] = te_values + targets_array.append(target) diff --git a/pyproject.toml b/pyproject.toml index fe6b2104..cf712384 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ authors = [ license = {file = "LICENSE"} requires-python = ">=3.10, <3.13.0" dependencies = [ - "policyengine_us", + "policyengine_us>=1.197.0", "policyengine_core>=3.14.1", "requests", "tqdm",