Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
added:
- Move all randomness to data package for deterministic country package. Take-up decisions are now generated stochastically during dataset creation using take-up rates from YAML parameter files.
115 changes: 82 additions & 33 deletions policyengine_uk_data/datasets/frs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
fill_with_mean,
STORAGE_FOLDER,
)
from policyengine_uk_data.parameters import load_take_up_rate, load_parameter


def create_frs(
Expand Down Expand Up @@ -818,48 +819,96 @@ def determine_education_level(fted_val, typeed2_val, age_val):
paragraph_3 | paragraph_4 | paragraph_5
)

# Add random variables which are for now in policyengine-uk.
# Generate stochastic take-up decisions
# All randomness is generated here in the data package using take-up rates
# stored in YAML parameter files. This keeps the country package purely
# deterministic.

RANDOM_VARIABLES = [
"would_evade_tv_licence_fee",
"would_claim_pc",
"would_claim_uc",
"would_claim_child_benefit",
"main_residential_property_purchased_is_first_home",
"household_owns_tv",
"is_higher_earner",
"attends_private_school",
]
generator = np.random.default_rng(seed=100)

for variable in RANDOM_VARIABLES:
value = sim.calculate(variable).values
entity = sim.tax_benefit_system.variables[variable].entity.key
if entity == "person":
pe_person[variable] = value
elif entity == "household":
pe_household[variable] = value
elif entity == "benunit":
pe_benunit[variable] = value
# Load take-up rates from parameter files
child_benefit_rate = load_take_up_rate("child_benefit", year)
pension_credit_rate = load_take_up_rate("pension_credit", year)
universal_credit_rate = load_take_up_rate("universal_credit", year)
marriage_allowance_rate = load_take_up_rate("marriage_allowance", year)
child_benefit_opts_out_rate = load_take_up_rate(
"child_benefit_opts_out_rate", year
)
tfc_rate = load_take_up_rate("tax_free_childcare", year)
extended_childcare_rate = load_take_up_rate("extended_childcare", year)
universal_childcare_rate = load_take_up_rate("universal_childcare", year)
targeted_childcare_rate = load_take_up_rate("targeted_childcare", year)

# Add Tax-Free Childcare assumptions
# Generate take-up decisions by comparing random draws to take-up rates
# Person-level
pe_person["would_claim_marriage_allowance"] = (
generator.random(len(pe_person)) < marriage_allowance_rate
)

count_benunits = len(pe_benunit)
# Benefit unit-level
pe_benunit["would_claim_child_benefit"] = (
generator.random(len(pe_benunit)) < child_benefit_rate
)
pe_benunit["child_benefit_opts_out"] = (
generator.random(len(pe_benunit)) < child_benefit_opts_out_rate
)
pe_benunit["would_claim_pc"] = (
generator.random(len(pe_benunit)) < pension_credit_rate
)
pe_benunit["would_claim_uc"] = (
generator.random(len(pe_benunit)) < universal_credit_rate
)
pe_benunit["would_claim_tfc"] = (
generator.random(len(pe_benunit)) < tfc_rate
)
pe_benunit["would_claim_extended_childcare"] = (
generator.random(len(pe_benunit)) < extended_childcare_rate
)
pe_benunit["would_claim_universal_childcare"] = (
generator.random(len(pe_benunit)) < universal_childcare_rate
)
pe_benunit["would_claim_targeted_childcare"] = (
generator.random(len(pe_benunit)) < targeted_childcare_rate
)

extended_would_claim = np.random.random(count_benunits) < 0.812
tfc_would_claim = np.random.random(count_benunits) < 0.586
universal_would_claim = np.random.random(count_benunits) < 0.563
targeted_would_claim = np.random.random(count_benunits) < 0.597
# Generate other stochastic variables using rates from parameter files
tv_ownership_rate = load_parameter("stochastic", "tv_ownership_rate", year)
tv_evasion_rate = load_parameter(
"stochastic", "tv_licence_evasion_rate", year
)
first_time_buyer_rate = load_parameter(
"stochastic", "first_time_buyer_rate", year
)

# Household-level: TV ownership
pe_household["household_owns_tv"] = (
generator.random(len(pe_household)) < tv_ownership_rate
)

# Generate extended childcare hours usage values with mean 15.019 and sd 4.972
extended_hours_values = np.random.normal(15.019, 4.972, count_benunits)
# Household-level: TV licence evasion
pe_household["would_evade_tv_licence_fee"] = (
generator.random(len(pe_household)) < tv_evasion_rate
)

# Household-level: First home purchase
pe_household["main_residential_property_purchased_is_first_home"] = (
generator.random(len(pe_household)) < first_time_buyer_rate
)

# Person-level: Tie-breaking for higher earner (uniform random)
pe_person["higher_earner_tie_break"] = generator.random(len(pe_person))

# Person-level: Private school attendance random draw
pe_person["attends_private_school_random_draw"] = generator.random(
len(pe_person)
)

# Generate extended childcare hours usage values with mean 15.019 and sd
# 4.972
extended_hours_values = generator.normal(15.019, 4.972, len(pe_benunit))
# Clip values to be between 0 and 30 hours
extended_hours_values = np.clip(extended_hours_values, 0, 30)

pe_benunit["would_claim_extended_childcare"] = extended_would_claim
pe_benunit["would_claim_tfc"] = tfc_would_claim
pe_benunit["would_claim_universal_childcare"] = universal_would_claim
pe_benunit["would_claim_targeted_childcare"] = targeted_would_claim

# Add the maximum extended childcare hours usage
pe_benunit["maximum_extended_childcare_hours_usage"] = (
extended_hours_values
Expand Down
68 changes: 68 additions & 0 deletions policyengine_uk_data/parameters/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
Take-up rate parameters for stochastic simulation.

These parameters are stored in the data package to keep the country package
as a purely deterministic rules engine.
"""

import yaml
from pathlib import Path

PARAMETERS_DIR = Path(__file__).parent


def load_parameter(
category: str, variable_name: str, year: int = 2015
) -> float:
"""Load parameter from YAML files in a specific category.

Args:
category: Category subfolder (e.g., 'take_up', 'stochastic')
variable_name: Name of the parameter file (without .yaml)
year: Year for which to get the value

Returns:
Parameter value as a float
"""
yaml_path = PARAMETERS_DIR / category / f"{variable_name}.yaml"

with open(yaml_path) as f:
data = yaml.safe_load(f)

# Find the applicable value for the year
values = data["values"]
applicable_value = None

for date_key, value in sorted(values.items()):
# Handle both string and datetime.date objects from YAML
if hasattr(date_key, "year"):
# It's a datetime.date object
date_year = date_key.year
else:
# It's a string
date_year = int(date_key.split("-")[0])

if date_year <= year:
applicable_value = value
else:
break

if applicable_value is None:
raise ValueError(
f"No value found for {category}/{variable_name} in {year}"
)

return applicable_value


def load_take_up_rate(variable_name: str, year: int = 2015) -> float:
"""Load take-up rate from YAML parameter files.

Args:
variable_name: Name of the take-up parameter file (without .yaml)
year: Year for which to get the rate

Returns:
Take-up rate as a float between 0 and 1
"""
return load_parameter("take_up", variable_name, year)
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
description: Percentage of residential property purchases that are by first-time buyers
metadata:
unit: /1
label: First-time buyer rate
reference:
- title: ONS First-time buyer mortgage sales by local authority
href: https://www.ons.gov.uk/releases/firsttimebuyermortgagesalesbylocalauthorityuk2006to2023
- title: Uswitch First-Time Buyer Statistics 2024
href: https://www.uswitch.com/mortgages/first-time-buyer-statistics/
values:
2013-01-01: 0.280 # ONS data
2023-01-01: 0.384 # 38.4% of property sales were first-time buyers
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
description: Percentage of TV-owning households that evade the TV licence fee
metadata:
unit: /1
label: TV licence evasion rate
reference:
- title: TV Licensing annual evader statistics
href: https://www.tvlicensing.co.uk/about/media-centre/news/tv-licensing-publishes-annual-evader-statistics-NEWS31
- title: House of Commons Library - TV licence fee statistics
href: https://commonslibrary.parliament.uk/research-briefings/cbp-8101/
values:
2015-01-01: 0.05 # Historical low point
2018-01-01: 0.0657 # Official BBC estimate
2022-01-01: 0.1058 # Significant increase
2024-01-01: 0.1252 # Current BBC estimate
10 changes: 10 additions & 0 deletions policyengine_uk_data/parameters/stochastic/tv_ownership_rate.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
description: Percentage of households that own a functioning colour TV
metadata:
unit: /1
label: TV ownership rate
reference:
- title: Ofcom - 95% of UK homes had at least one TV set in 2020
href: https://www.statista.com/statistics/269969/number-of-tv-households-in-the-uk/
values:
2015-01-01: 0.96
2020-01-01: 0.95
9 changes: 9 additions & 0 deletions policyengine_uk_data/parameters/take_up/child_benefit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
description: Share of eligible children that participate in Child Benefit
metadata:
unit: /1
reference:
- title: "Child Benefit statistics: 2022 annual release"
href: https://www.gov.uk/government/statistics/child-benefit-statistics-annual-release-august-2022/child-benefit-statistics-annual-release-data-at-august-2022#:~:text=since%202012%20the%20take%2Dup,level%20in%202022%20of%2089%25.
values:
2012-01-01: 0.97
2022-01-01: 0.89
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
description: Percentage of fully High Income Child Benefit Charge-liable families who opt out of Child Benefit.
metadata:
unit: /1
label: Child Benefit HITC-liable opt-out rate
reference:
- title: "Child Benefit Statistics: Annual Release, August 2022"
href: https://www.gov.uk/government/statistics/child-benefit-statistics-annual-release-august-2022/child-benefit-statistics-annual-release-data-at-august-2022
values:
2019-01-01: 0.23 # 3m families have ANI over £60k in the 2023 FRS, 683k families opt out of CB.
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
description: Extended childcare entitlement take-up rate
metadata:
unit: /1
period: year
reference:
- title: Empirical estimate from FRS data
href: https://github.com/PolicyEngine/policyengine-uk-data
values:
2015-01-01: 0.812
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
description: Percentage of eligible couples who claim Marriage Allowance.
metadata:
unit: /1
label: Marriage Allowance take-up rate
values:
2000-01-01: 1
10 changes: 10 additions & 0 deletions policyengine_uk_data/parameters/take_up/pension_credit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
description: Share of eligible Pension Credit recipients that participate.
metadata:
label: Pension Credit take-up rate
name: PC_takeup
unit: /1
reference:
- title: "Income-related benefits: estimates of take-up: financial year 2019 to 2020"
href: https://www.gov.uk/government/statistics/income-related-benefits-estimates-of-take-up-financial-year-2019-to-2020/income-related-benefits-estimates-of-take-up-financial-year-2019-to-2020#pension-credit-2
values:
2015-01-01: 0.7
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
description: Targeted childcare entitlement take-up rate
metadata:
unit: /1
period: year
reference:
- title: Empirical estimate from FRS data
href: https://github.com/PolicyEngine/policyengine-uk-data
values:
2015-01-01: 0.597
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
description: Tax-Free Childcare take-up rate
metadata:
unit: /1
period: year
reference:
- title: Empirical estimate from FRS data
href: https://github.com/PolicyEngine/policyengine-uk-data
values:
2015-01-01: 0.586
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
description: Universal childcare entitlement take-up rate
metadata:
unit: /1
period: year
reference:
- title: Empirical estimate from FRS data
href: https://github.com/PolicyEngine/policyengine-uk-data
values:
2015-01-01: 0.563
6 changes: 6 additions & 0 deletions policyengine_uk_data/parameters/take_up/universal_credit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
description: Take-up rate of Universal Credit.
metadata:
unit: /1
label: Universal Credit take-up rate
values:
2015-01-01: 0.55
Loading
Loading