Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions policyengine_us_data/datasets/cps/cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,29 +242,6 @@ def add_personal_variables(cps: h5py.File, person: DataFrame) -> None:
]
cps["is_disabled"] = (person[DISABILITY_FLAGS] == 1).any(axis=1)

def _assign_some_newborns_to_pregnancy(
age: pd.Series, person: pd.DataFrame
) -> pd.Series:
"""Takes an array of ages, returns the new age array with the given percentage of newborns assigned a negative age (in pregnancy)."""
age = np.where(
person.A_AGE == 0,
np.where(
np.random.randint(
0, 2, len(person)
), # Random number of 0 or 1
# If 1 is flipped, select a random number between -0.75 and 0
# This will represent the pregnany month
# At -0.75 the pregnancy month is 0 and at -0.0001 the pregnancy month is 9
np.random.uniform(-0.75, 0, len(person)),
# If 0 is flipped, the child is a newborn at the age of 0 to 1
np.random.uniform(0, 1, len(person)),
),
person.A_AGE,
)
return age

cps["age"] = _assign_some_newborns_to_pregnancy(cps["age"], person)

def children_per_parent(col: str) -> pd.DataFrame:
"""Calculate number of children in the household using parental
pointers.
Expand Down
53 changes: 39 additions & 14 deletions policyengine_us_data/tests/test_datasets/test_enhanced_cps.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,46 @@ def test_ecps_has_mortgage_interest():
sim = Microsimulation(dataset=EnhancedCPS_2024)

assert sim.calculate("deductible_mortgage_interest").sum() > 1
assert sim.calculate("deductible_interest_expense").sum() > 1


def test_newborns_and_pregnancies():
from policyengine_us_data.datasets.cps import EnhancedCPS_2024
def test_ecps_replicates_jct_tax_expenditures():
from policyengine_us import Microsimulation
from policyengine_core.reforms import Reform
from policyengine_us_data.datasets import EnhancedCPS_2024

# JCT tax expenditure targets
EXPENDITURE_TARGETS = {
"salt_deduction": 21.247e9,
"medical_expense_deduction": 11.4e9,
"charitable_deduction": 65.301e9,
"interest_deduction": 24.8e9,
}

sim = Microsimulation(dataset=EnhancedCPS_2024)

# Test for unborn children (age < 0)
unborn = sim.calculate("age") < 0
unborn_count = unborn.sum()
assert unborn_count > 0

# Test for newborns (0 <= age < 1)
newborns = (sim.calculate("age") >= 0) & (sim.calculate("age") < 1)
newborn_count = newborns.sum()
assert newborn_count > 0
baseline = Microsimulation(dataset=EnhancedCPS_2024)
income_tax_b = baseline.calculate(
"income_tax", period=2024, map_to="household"
)

for deduction, target in EXPENDITURE_TARGETS.items():
# Create reform that neutralizes the deduction
class RepealDeduction(Reform):
def apply(self):
self.neutralize_variable(deduction)

# Run reform simulation
reformed = Microsimulation(
reform=RepealDeduction, dataset=EnhancedCPS_2024
)
income_tax_r = reformed.calculate(
"income_tax", period=2024, map_to="household"
)

# Calculate tax expenditure
tax_expenditure = (income_tax_r - income_tax_b).sum()
pct_error = abs((tax_expenditure - target) / target)
TOLERANCE = 0.15

print(
f"{deduction} tax expenditure {tax_expenditure/1e9:.1f}bn differs from target {target/1e9:.1f}bn by {pct_error:.2%}"
)
assert pct_error < TOLERANCE, deduction
74 changes: 66 additions & 8 deletions policyengine_us_data/utils/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .soi import pe_to_soi, get_soi
import numpy as np
from policyengine_us_data.storage import STORAGE_FOLDER
from policyengine_core.reforms import Reform


def fmt(x):
Expand Down Expand Up @@ -132,6 +133,7 @@ def build_loss_matrix(dataset: type, time_period):
from policyengine_us import Microsimulation

sim = Microsimulation(dataset=dataset)
sim.default_calculation_period = time_period
hh_id = sim.calculate("household_id", map_to="person")
tax_unit_hh_id = sim.map_result(
hh_id, "person", "tax_unit", how="value_from_first_person"
Expand Down Expand Up @@ -252,7 +254,7 @@ def build_loss_matrix(dataset: type, time_period):
"alimony_income": 13e9,
"alimony_expense": 13e9,
# Rough estimate, not CPS derived
"real_estate_taxes": 400e9, # Rough estimate between 350bn and 600bn total property tax collections
"real_estate_taxes": 500e9, # Rough estimate between 350bn and 600bn total property tax collections
"rent": 735e9, # ACS total uprated by CPI
}

Expand Down Expand Up @@ -340,18 +342,22 @@ def build_loss_matrix(dataset: type, time_period):
)
targets_array.append(row["population_under_5"])

# Population by number of newborns and pregancies

age = sim.calculate("age").values
infants = (age >= 0) & (age < 1)
label = "census/infants"
loss_matrix[label] = sim.map_result(infants, "person", "household")
targets_array.append(3_491_679)
# Total number of infants in the 1 Year ACS
INFANTS_2023 = 3_491_679
INFANTS_2022 = 3_437_933
# Assume infant population grows at the same rate from 2023.
infants_2024 = INFANTS_2023 * (INFANTS_2023 / INFANTS_2022)
targets_array.append(infants_2024)

# SALT tax expenditure targeting

pregnancies = (age >= -0.75) & (age < 0)
label = "census/pregnancies"
loss_matrix[label] = sim.map_result(pregnancies, "person", "household")
targets_array.append(2_618_759)
_add_tax_expenditure_targets(
dataset, time_period, sim, loss_matrix, targets_array
)

if any(loss_matrix.isna().sum() > 0):
raise ValueError("Some targets are missing from the loss matrix")
Expand All @@ -360,3 +366,55 @@ def build_loss_matrix(dataset: type, time_period):
raise ValueError("Some targets are missing from the targets array")

return loss_matrix, np.array(targets_array)


def _add_tax_expenditure_targets(
dataset,
time_period,
baseline_simulation,
loss_matrix: pd.DataFrame,
targets_array: list,
):
from policyengine_us import Microsimulation

income_tax_b = baseline_simulation.calculate(
"income_tax", map_to="household"
).values

# Dictionary of itemized deductions and their target values
# (in billions for 2024, per the 2024 JCT Tax Expenditures report)
# https://www.jct.gov/publications/2024/jcx-48-24/
ITEMIZED_DEDUCTIONS = {
"salt_deduction": 21.247e9,
"medical_expense_deduction": 11.4e9,
"charitable_deduction": 65.301e9,
"interest_deduction": 24.8e9,
}

def make_repeal_class(deduction_var):
# Create a custom Reform subclass that neutralizes the given deduction.
class RepealDeduction(Reform):
def apply(self):
self.neutralize_variable(deduction_var)

return RepealDeduction

for deduction, target in ITEMIZED_DEDUCTIONS.items():
# Generate the custom repeal class for the current deduction.
RepealDeduction = make_repeal_class(deduction)

# Run the microsimulation using the repeal reform.
simulation = Microsimulation(dataset=dataset, reform=RepealDeduction)
simulation.default_calculation_period = time_period

# Calculate the baseline and reform income tax values.
income_tax_r = simulation.calculate(
"income_tax", map_to="household"
).values

# Compute the tax expenditure (TE) values.
te_values = income_tax_r - income_tax_b

# Record the TE difference and the corresponding target value.
loss_matrix[f"jct/{deduction}_expenditure"] = te_values
targets_array.append(target)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ authors = [
license = {file = "LICENSE"}
requires-python = ">=3.10, <3.13.0"
dependencies = [
"policyengine_us",
"policyengine_us>=1.197.0",
"policyengine_core>=3.14.1",
"requests",
"tqdm",
Expand Down
Loading