Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions policyengine_us_data/spm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
SPM (Supplemental Poverty Measure) threshold calculation module.

This module provides tools to calculate SPM thresholds from source data:
- Base thresholds from BLS Consumer Expenditure Survey (by tenure type)
- Geographic adjustments (GEOADJ) from ACS median rents
- Local thresholds for congressional districts

The SPM threshold formula is:
threshold = base_threshold[tenure] × equivalence_scale × geoadj

Where:
- base_threshold varies by tenure: renter, owner with mortgage, owner without
- equivalence_scale adjusts for family composition
- geoadj adjusts for local housing costs (0.84 in WV to 1.27 in HI)

Usage:
from policyengine_us_data.spm import (
calculate_base_thresholds,
create_district_geoadj_lookup,
calculate_local_spm_thresholds,
update_spm_thresholds_for_districts,
)

# Get base thresholds for 2024
base = calculate_base_thresholds(2024)

# Get district GEOADJ lookup table
geoadj = create_district_geoadj_lookup(2022)

# Calculate local thresholds for SPM units
thresholds = calculate_local_spm_thresholds(
district_codes=["0612", "0611"],
tenure_types=["renter", "owner_with_mortgage"],
num_adults=[2, 1],
num_children=[2, 0],
year=2024,
)
"""

from .ce_threshold import calculate_base_thresholds
from .district_geoadj import (
create_district_geoadj_lookup,
get_district_geoadj,
calculate_geoadj_from_rent,
)
from .local_threshold import (
calculate_local_spm_thresholds,
update_spm_thresholds_for_districts,
spm_equivalence_scale,
)

__all__ = [
"calculate_base_thresholds",
"create_district_geoadj_lookup",
"get_district_geoadj",
"calculate_geoadj_from_rent",
"calculate_local_spm_thresholds",
"update_spm_thresholds_for_districts",
"spm_equivalence_scale",
]
58 changes: 58 additions & 0 deletions policyengine_us_data/spm/ce_threshold.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""
Base SPM thresholds from Consumer Expenditure Survey.

Wraps spm-calculator for use in policyengine-us-data.
"""

from typing import Optional

# Published BLS thresholds by year and tenure type
# Source: https://www.bls.gov/pir/spm/spm_thresholds.htm
BLS_PUBLISHED_THRESHOLDS = {
2024: {
"renter": 39430,
"owner_with_mortgage": 39068,
"owner_without_mortgage": 32586,
},
2023: {
"renter": 36606,
"owner_with_mortgage": 36192,
"owner_without_mortgage": 30347,
},
2022: {
"renter": 33402,
"owner_with_mortgage": 32949,
"owner_without_mortgage": 27679,
},
}


def calculate_base_thresholds(
year: int = 2024,
use_published: bool = True,
) -> dict[str, float]:
"""
Get base SPM thresholds for the reference family (2A2C) by tenure type.

Args:
year: Target year for thresholds
use_published: If True, use published BLS thresholds when available.
If False or year not available, forecast from latest.

Returns:
Dict with keys 'renter', 'owner_with_mortgage', 'owner_without_mortgage'
and threshold values in dollars.
"""
if use_published and year in BLS_PUBLISHED_THRESHOLDS:
return BLS_PUBLISHED_THRESHOLDS[year].copy()

# Forecast from latest available year
latest_year = max(BLS_PUBLISHED_THRESHOLDS.keys())
latest_thresholds = BLS_PUBLISHED_THRESHOLDS[latest_year]

# Use approximately 3% annual inflation for forecasting
# TODO: Use actual CPI-U or better methodology
years_ahead = year - latest_year
inflation_factor = 1.03**years_ahead

return {k: v * inflation_factor for k, v in latest_thresholds.items()}
191 changes: 191 additions & 0 deletions policyengine_us_data/spm/district_geoadj.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
"""
Geographic adjustment (GEOADJ) lookup for congressional districts.

GEOADJ adjusts SPM thresholds for local housing costs using the formula:
GEOADJ = (local_median_rent / national_median_rent) × 0.492 + 0.508

Where 0.492 is the housing share of the SPM threshold for renters.

Data source: ACS Table B25031 (Median Gross Rent by Bedrooms)
"""

import os
from functools import lru_cache
from pathlib import Path
from typing import Optional

import numpy as np
import pandas as pd

# Housing portion of SPM threshold (for renters)
HOUSING_SHARE = 0.492

# Path to cached GEOADJ data
STORAGE_FOLDER = Path(__file__).parent.parent / "storage"


def calculate_geoadj_from_rent(
local_rent: float | np.ndarray,
national_rent: float,
) -> float | np.ndarray:
"""
Calculate GEOADJ from local and national median rents.

Formula: GEOADJ = (local_rent / national_rent) × 0.492 + 0.508

Args:
local_rent: Local area median rent (scalar or array)
national_rent: National median rent

Returns:
GEOADJ value(s)
"""
rent_ratio = np.asarray(local_rent) / national_rent
return rent_ratio * HOUSING_SHARE + (1 - HOUSING_SHARE)


@lru_cache(maxsize=16)
def _load_district_geoadj(year: int) -> pd.DataFrame:
"""Load or create district GEOADJ lookup table."""
cache_file = STORAGE_FOLDER / f"district_geoadj_{year}.csv"

if cache_file.exists():
return pd.read_csv(cache_file, dtype={"district_code": str})

# Create from ACS data
df = _create_district_geoadj_from_acs(year)
df.to_csv(cache_file, index=False)
return df


def _fetch_acs_district_rents(year: int) -> pd.DataFrame:
"""
Fetch median 2-bedroom rent by congressional district from ACS.

Uses Census API to get ACS 5-year estimates, Table B25031.
"""
try:
from census import Census
except ImportError:
raise ImportError(
"census package required. Install with: pip install census"
)

api_key = os.environ.get("CENSUS_API_KEY")
if not api_key:
raise ValueError(
"CENSUS_API_KEY environment variable not set. "
"Get a free key at https://api.census.gov/data/key_signup.html"
)

c = Census(api_key)

# B25031_004E = Median gross rent, 2 bedrooms
variable = "B25031_004E"

all_data = []
for state_fips in range(1, 57): # State FIPS codes
try:
data = c.acs5.get(
[variable],
{
"for": "congressional district:*",
"in": f"state:{state_fips:02d}",
},
year=year,
)
all_data.extend(data)
except Exception:
pass

df = pd.DataFrame(all_data)
df["district_code"] = df["state"].str.zfill(2) + df[
"congressional district"
].str.zfill(2)
df["median_rent"] = pd.to_numeric(df[variable], errors="coerce")

return df[["district_code", "median_rent"]].dropna()


def _get_national_median_rent(year: int) -> float:
"""Get national median 2-bedroom rent for a year."""
try:
from census import Census
except ImportError:
raise ImportError("census package required")

api_key = os.environ.get("CENSUS_API_KEY")
if not api_key:
raise ValueError("CENSUS_API_KEY not set")

c = Census(api_key)
data = c.acs5.get(["B25031_004E"], {"for": "us:*"}, year=year)
return float(data[0]["B25031_004E"])


def _create_district_geoadj_from_acs(year: int) -> pd.DataFrame:
"""
Create GEOADJ lookup table for all congressional districts.

Args:
year: ACS 5-year end year

Returns:
DataFrame with district_code, median_rent, and geoadj columns
"""
# Get district rents
df = _fetch_acs_district_rents(year)

# Get national rent
national_rent = _get_national_median_rent(year)

# Calculate GEOADJ
df["geoadj"] = calculate_geoadj_from_rent(df["median_rent"], national_rent)

# Clamp to reasonable range (0.70 to 1.50)
df["geoadj"] = df["geoadj"].clip(0.70, 1.50)

return df


def create_district_geoadj_lookup(
year: int = 2022,
from_cache: bool = True,
) -> pd.DataFrame:
"""
Create or load GEOADJ lookup table for congressional districts.

Args:
year: ACS 5-year end year (use year - 1 from target year)
from_cache: If True, use cached data if available

Returns:
DataFrame with columns:
- district_code: 4-digit code (state FIPS + district number)
- median_rent: Median 2-bedroom rent from ACS
- geoadj: Geographic adjustment factor
"""
if from_cache:
return _load_district_geoadj(year)
return _create_district_geoadj_from_acs(year)


def get_district_geoadj(district_code: str, year: int = 2022) -> float:
"""
Get GEOADJ for a specific congressional district.

Args:
district_code: 4-digit district code (e.g., "0612" for CA-12)
year: ACS year for rent data

Returns:
GEOADJ value (typically 0.84 to 1.27)
"""
lookup = create_district_geoadj_lookup(year)
match = lookup[lookup["district_code"] == district_code]

if len(match) == 0:
# Return national average if district not found
return 1.0

return match["geoadj"].iloc[0]
Loading
Loading