diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..e5554377 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - Clip negative values to zero for employment, self-employment, savings interest, tax-free savings, and dividend income variables. diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py index e0c69990..467cc0d2 100644 --- a/policyengine_uk_data/datasets/frs.py +++ b/policyengine_uk_data/datasets/frs.py @@ -339,7 +339,9 @@ def determine_education_level(fted_val, typeed2_val, age_val): WEEKS_IN_YEAR = 365.25 / 7 - pe_person["employment_income"] = person.inearns * WEEKS_IN_YEAR + pe_person["employment_income"] = ( + np.maximum(0, person.inearns) * WEEKS_IN_YEAR + ) pension_payment = sum_to_entity( pension.penpay * (pension.penpay > 0), @@ -365,17 +367,20 @@ def determine_education_level(fted_val, typeed2_val, age_val): pension_payment + pension_tax_paid + pension_deductions_removed ) * WEEKS_IN_YEAR - pe_person["self_employment_income"] = person.seincam2 * WEEKS_IN_YEAR + pe_person["self_employment_income"] = ( + np.maximum(0, person.seincam2) * WEEKS_IN_YEAR + ) INVERTED_BASIC_RATE = 1.25 - pe_person["tax_free_savings_income"] = ( + pe_person["tax_free_savings_income"] = np.maximum( + 0, sum_to_entity( account.accint * (account.account == 21), account.person_id, person.person_id, ) - * WEEKS_IN_YEAR + * WEEKS_IN_YEAR, ) taxable_savings_interest = ( sum_to_entity( @@ -389,10 +394,12 @@ def determine_education_level(fted_val, typeed2_val, age_val): ) * WEEKS_IN_YEAR ) - pe_person["savings_interest_income"] = ( - taxable_savings_interest + pe_person["tax_free_savings_income"].values + pe_person["savings_interest_income"] = np.maximum( + 0, + taxable_savings_interest + pe_person["tax_free_savings_income"].values, ) - pe_person["dividend_income"] = ( + pe_person["dividend_income"] = np.maximum( + 0, sum_to_entity( ( account.accint @@ -405,7 +412,7 @@ def determine_education_level(fted_val, typeed2_val, age_val): account.person_id, person.index, ) - * 52 + * 52, ) is_head = person.hrpid == 1 household_property_income = ( diff --git a/policyengine_uk_data/tests/test_non_negative_incomes.py b/policyengine_uk_data/tests/test_non_negative_incomes.py new file mode 100644 index 00000000..4d9f671a --- /dev/null +++ b/policyengine_uk_data/tests/test_non_negative_incomes.py @@ -0,0 +1,23 @@ +import pytest + +INCOME_VARIABLES = [ + "employment_income", + "self_employment_income", + "tax_free_savings_income", + "savings_interest_income", + "dividend_income", + "private_pension_income", + "property_income", + "maintenance_income", + "miscellaneous_income", +] + + +@pytest.mark.parametrize("variable", INCOME_VARIABLES) +def test_income_non_negative(frs, variable: str): + """Test that income variables have no negative values.""" + values = frs.person[variable] + min_value = values.min() + assert ( + min_value >= 0 + ), f"{variable} has negative values (min = {min_value:.2f})"