From 35136b803ac204a4cbc69bd3d355914d17075423 Mon Sep 17 00:00:00 2001
From: daphnehanse11 <128793799+daphnehanse11@users.noreply.github.com>
Date: Thu, 3 Jul 2025 10:43:45 -0400
Subject: [PATCH 01/34] new branch attempt

---
 changelog_entry.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29b..c2032d84 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+  changes:
+    added:
+    - Medicaid state level calibration targets.
\ No newline at end of file

From e2729a8709eeb24e4811e12fab719bdd6fd6bd9b Mon Sep 17 00:00:00 2001
From: daphnehanse11 <128793799+daphnehanse11@users.noreply.github.com>
Date: Thu, 3 Jul 2025 10:47:44 -0400
Subject: [PATCH 02/34] recreation

---
 .gitignore                                    |  1 +
 .../storage/medicaid_enrollment_2024.csv      | 52 +++++++++++++++++++
 .../tests/test_datasets/test_enhanced_cps.py  | 40 ++++++++++++++
 policyengine_us_data/utils/loss.py            | 37 +++++++++++++
 4 files changed, 130 insertions(+)
 create mode 100644 policyengine_us_data/storage/medicaid_enrollment_2024.csv

diff --git a/.gitignore b/.gitignore
index a66fdef5..b65290c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 **/*.h5
 **/*.csv
 !healthcare_spending.csv
+!medicaid_enrollment_2024.csv
 !eitc.csv
 !spm_threshold_agi.csv
 **/_build
diff --git a/policyengine_us_data/storage/medicaid_enrollment_2024.csv b/policyengine_us_data/storage/medicaid_enrollment_2024.csv
new file mode 100644
index 00000000..67e115c7
--- /dev/null
+++ b/policyengine_us_data/storage/medicaid_enrollment_2024.csv
@@ -0,0 +1,52 @@
+state,enrollment
+AK,231577
+AL,766009
+AR,733561
+AZ,1778734
+CA,12172695
+CO,1058326
+CT,904321
+DC,240020
+DE,236840
+FL,3568648
+GA,1699279
+HI,376318
+IA,586748
+ID,296968
+IL,2918179
+IN,1623361
+KS,335902
+KY,1244822
+LA,1377806
+MA,1453344
+MD,1280697
+ME,322306
+MI,2194067
+MN,1146667
+MO,1118780
+MS,514730
+MT,193278
+NC,2469712
+ND,100543
+NE,302971
+NH,166813
+NJ,1506239
+NM,686825
+NV,713936
+NY,5946806
+OH,2596879
+OK,894911
+OR,1123313
+PA,2783389
+RI,273400
+SC,932515
+SD,126952
+TN,1268904
+TX,3821806
+UT,300742
+VA,1596777
+VT,151833
+WA,1776116
+WI,1108320
+WV,467632
+WY,57320
\ No newline at end of file
diff --git a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py
index cdb47667..6d79d2fc 100644
--- a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py
+++ b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py
@@ -214,3 +214,43 @@ def test_aca_calibration():
     assert (
         not failed
     ), f"One or more states exceeded tolerance of {TOLERANCE:.0%}."
+
+
+def test_medicaid_calibration():
+
+    import pandas as pd
+    from pathlib import Path
+    from policyengine_us import Microsimulation
+    from policyengine_us_data.datasets.cps import EnhancedCPS_2024
+
+    TARGETS_PATH = Path(
+        "policyengine_us_data/storage/medicaid_enrollment_2024.csv"
+    )
+    targets = pd.read_csv(TARGETS_PATH)
+
+    sim = Microsimulation(dataset=EnhancedCPS_2024)
+    state_code_hh = sim.calculate("state_code", map_to="household").values
+    medicaid_enrolled = sim.calculate(
+        "medicaid_enrolled", map_to="household", period=2025
+    )
+
+    TOLERANCE = 0.45
+    failed = False
+    for _, row in targets.iterrows():
+        state = row["state"]
+        target_spending = row["spending"]
+        simulated = medicaid_enrolled[state_code_hh == state].sum()
+
+        pct_error = abs(simulated - target_spending) / target_spending
+        print(
+            f"{state}: simulated ${simulated/1e9:.2f} bn  "
+            f"target ${target_spending/1e9:.2f} bn  "
+            f"error {pct_error:.2%}"
+        )
+
+        if pct_error > TOLERANCE:
+            failed = True
+
+    assert (
+        not failed
+    ), f"One or more states exceeded tolerance of {TOLERANCE:.0%}."
diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py
index 9e3cdc60..74135a07 100644
--- a/policyengine_us_data/utils/loss.py
+++ b/policyengine_us_data/utils/loss.py
@@ -682,6 +682,43 @@ def _add_agi_metric_columns(
         loss_matrix[col_name] = metric
 
     return loss_matrix
+    # Medicaid enrollment by state
+
+    enrollment_by_state = pd.read_csv(
+        STORAGE_FOLDER / "medicaid_enrollment_2024.csv"
+    )
+
+    # One-time pulls so we don’t re-compute inside the loop
+    state_person = sim.calculate("state_code", map_to="person").values
+
+    # Flag people in households that actually receive medicaid
+    has_medicaid = sim.calculate(
+        "medicaid_enrolled", map_to="person", period=2025
+    )
+    is_medicaid_eligible = sim.calculate(
+        "is_medicaid_eligible", map_to="person", period=2025
+    ).values
+    is_enrolled = has_medicaid & is_medicaid_eligible
+
+    for _, row in enrollment_by_state.iterrows():
+        # People who both live in the state and have marketplace coverage
+        in_state = state_person == row["state"]
+        in_state_enrolled = in_state & is_enrolled
+
+        label = f"irs/medicaid_enrollment/{row['state'].lower()}"
+        loss_matrix[label] = sim.map_result(
+            in_state_enrolled, "person", "household"
+        )
+        if any(loss_matrix[label].isna()):
+            raise ValueError(f"Missing values for {label}")
+
+        # Convert to thousands for the target
+        targets_array.append(row["enrollment"])
+
+        print(
+            f"Targeting Medicaid enrollment for {row['state']} "
+            f"with target {row['enrollment']:.0f}k"
+        )
 
 
 def _add_state_real_estate_taxes(loss_matrix, targets_list, sim):

From 15a6c0a0c5a085da779787ea1ed2f6185282803f Mon Sep 17 00:00:00 2001
From: daphnehanse11 <128793799+daphnehanse11@users.noreply.github.com>
Date: Thu, 3 Jul 2025 10:53:44 -0400
Subject: [PATCH 03/34] enrollment

---
 .../tests/test_datasets/test_enhanced_cps.py                | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py
index 6d79d2fc..abf67301 100644
--- a/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py
+++ b/policyengine_us_data/tests/test_datasets/test_enhanced_cps.py
@@ -238,13 +238,13 @@ def test_medicaid_calibration():
     failed = False
     for _, row in targets.iterrows():
         state = row["state"]
-        target_spending = row["spending"]
+        target_enrollment = row["enrollment"]
         simulated = medicaid_enrolled[state_code_hh == state].sum()
 
-        pct_error = abs(simulated - target_spending) / target_spending
+        pct_error = abs(simulated - target_enrollment) / target_enrollment
         print(
             f"{state}: simulated ${simulated/1e9:.2f} bn  "
-            f"target ${target_spending/1e9:.2f} bn  "
+            f"target ${target_enrollment/1e9:.2f} bn  "
             f"error {pct_error:.2%}"
         )
 

From 3b15c495c6dbde9914ed3d2dea1b0aea4be17d5b Mon Sep 17 00:00:00 2001
From: daphnehanse11 <128793799+daphnehanse11@users.noreply.github.com>
Date: Thu, 3 Jul 2025 12:30:13 -0400
Subject: [PATCH 04/34] medicaid

---
 policyengine_us_data/utils/loss.py | 75 +++++++++++++++---------------
 1 file changed, 38 insertions(+), 37 deletions(-)

diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py
index 74135a07..21abce0f 100644
--- a/policyengine_us_data/utils/loss.py
+++ b/policyengine_us_data/utils/loss.py
@@ -519,6 +519,44 @@ def build_loss_matrix(dataset: type, time_period):
         # Convert to thousands for the target
         targets_array.append(row["enrollment"])
 
+    # Medicaid enrollment by state
+
+    enrollment_by_state = pd.read_csv(
+        STORAGE_FOLDER / "medicaid_enrollment_2024.csv"
+    )
+
+    # One-time pulls so we don’t re-compute inside the loop
+    state_person = sim.calculate("state_code", map_to="person").values
+
+    # Flag people in households that actually receive medicaid
+    has_medicaid = sim.calculate(
+        "medicaid_enrolled", map_to="person", period=2025
+    )
+    is_medicaid_eligible = sim.calculate(
+        "is_medicaid_eligible", map_to="person", period=2025
+    ).values
+    is_enrolled = has_medicaid & is_medicaid_eligible
+
+    for _, row in enrollment_by_state.iterrows():
+        # People who both live in the state and have marketplace coverage
+        in_state = state_person == row["state"]
+        in_state_enrolled = in_state & is_enrolled
+
+        label = f"irs/medicaid_enrollment/{row['state'].lower()}"
+        loss_matrix[label] = sim.map_result(
+            in_state_enrolled, "person", "household"
+        )
+        if any(loss_matrix[label].isna()):
+            raise ValueError(f"Missing values for {label}")
+
+        # Convert to thousands for the target
+        targets_array.append(row["enrollment"])
+
+        print(
+            f"Targeting Medicaid enrollment for {row['state']} "
+            f"with target {row['enrollment']:.0f}k"
+        )
+
     # State 10-year age targets
 
     age_targets = pd.read_csv(STORAGE_FOLDER / "age_state.csv")
@@ -682,43 +720,6 @@ def _add_agi_metric_columns(
         loss_matrix[col_name] = metric
 
     return loss_matrix
-    # Medicaid enrollment by state
-
-    enrollment_by_state = pd.read_csv(
-        STORAGE_FOLDER / "medicaid_enrollment_2024.csv"
-    )
-
-    # One-time pulls so we don’t re-compute inside the loop
-    state_person = sim.calculate("state_code", map_to="person").values
-
-    # Flag people in households that actually receive medicaid
-    has_medicaid = sim.calculate(
-        "medicaid_enrolled", map_to="person", period=2025
-    )
-    is_medicaid_eligible = sim.calculate(
-        "is_medicaid_eligible", map_to="person", period=2025
-    ).values
-    is_enrolled = has_medicaid & is_medicaid_eligible
-
-    for _, row in enrollment_by_state.iterrows():
-        # People who both live in the state and have marketplace coverage
-        in_state = state_person == row["state"]
-        in_state_enrolled = in_state & is_enrolled
-
-        label = f"irs/medicaid_enrollment/{row['state'].lower()}"
-        loss_matrix[label] = sim.map_result(
-            in_state_enrolled, "person", "household"
-        )
-        if any(loss_matrix[label].isna()):
-            raise ValueError(f"Missing values for {label}")
-
-        # Convert to thousands for the target
-        targets_array.append(row["enrollment"])
-
-        print(
-            f"Targeting Medicaid enrollment for {row['state']} "
-            f"with target {row['enrollment']:.0f}k"
-        )
 
 
 def _add_state_real_estate_taxes(loss_matrix, targets_list, sim):

From ee6c850786a75754058b6504a88bad457dcfe341 Mon Sep 17 00:00:00 2001
From: daphnehanse11 <128793799+daphnehanse11@users.noreply.github.com>
Date: Mon, 7 Jul 2025 10:10:17 -0400
Subject: [PATCH 05/34] readme

---
 policyengine_us_data/storage/README.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 policyengine_us_data/storage/README.md

diff --git a/policyengine_us_data/storage/README.md b/policyengine_us_data/storage/README.md
new file mode 100644
index 00000000..88431a8a
--- /dev/null
+++ b/policyengine_us_data/storage/README.md
@@ -0,0 +1,17 @@
+# Sources for datasets
+
+# title: Medicaid enrollment 2024
+reference: 
+
+
+# storage/ datasets 
+
+- **aca_spending_and_enrollment_2024.csv**  
+  • Source: CMS Marketplace Public Use File, 2024 open-enrollment  
+  • Date: 2024  
+  • Location: https://www.cms.gov/files/document/health-insurance-exchanges-2024-open-enrollment-report-final.pdf
+
+- **medicaid_enrollment_2024.csv**  
+  • Source: MACPAC Enrollment Tables, FFY 2024  
+  • Date: 2024  
+  • Location: `https://www.medicaid.gov/resources-for-states/downloads/eligib-oper-and-enrol-snap-december2024.pdf#page=26

From 98901a0a7be528b59d961aba47aa379c4272bf20 Mon Sep 17 00:00:00 2001
From: PavelMakarchuk <pavel.ma99@gmail.com>
Date: Tue, 8 Jul 2025 11:59:00 +0200
Subject: [PATCH 06/34] Use "SURVIVING_SPOUSE" instead of "WIDOW" in `soi.py`
 Fixes #347

---
 changelog_entry.yaml                     | 4 ++++
 policyengine_us_data/datasets/cps/cps.py | 2 +-
 policyengine_us_data/utils/soi.py        | 4 +++-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29b..34cd58d6 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: patch
+  changes:
+    fixed:
+    - Use SURVIVING_SPOUSE and is_surviving_spouse instead of WIDOW and is_widowed.
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index f6fd0e75..3b976a31 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -356,7 +356,7 @@ def children_per_parent(col: str) -> pd.DataFrame:
     cps["cps_race"] = person.PRDTRACE
     cps["is_hispanic"] = person.PRDTHSP != 0
 
-    cps["is_widowed"] = person.A_MARITL == 4
+    cps["is_surviving_spouse"] = person.A_MARITL == 4
     cps["is_separated"] = person.A_MARITL == 6
     # High school or college/university enrollment status.
     cps["is_full_time_college_student"] = person.A_HSCOL == 2
diff --git a/policyengine_us_data/utils/soi.py b/policyengine_us_data/utils/soi.py
index 9d53a142..a34babf6 100644
--- a/policyengine_us_data/utils/soi.py
+++ b/policyengine_us_data/utils/soi.py
@@ -218,7 +218,9 @@ def compare_soi_replication_to_soi(df, soi):
         elif fs == "Head of Household":
             subset = subset[subset.filing_status == "HEAD_OF_HOUSEHOLD"]
         elif fs == "Married Filing Jointly/Surviving Spouse":
-            subset = subset[subset.filing_status.isin(["JOINT", "WIDOW"])]
+            subset = subset[
+                subset.filing_status.isin(["JOINT", "SURVIVING_SPOUSE"])
+            ]
         elif fs == "Married Filing Separately":
             subset = subset[subset.filing_status == "SEPARATE"]
 

From b51f9e3795c771ce2fa5d59a9e3d99fa4e302d04 Mon Sep 17 00:00:00 2001
From: daphnehanse11 <128793799+daphnehanse11@users.noreply.github.com>
Date: Tue, 8 Jul 2025 15:40:26 -0400
Subject: [PATCH 07/34] updates

---
 changelog_entry.yaml                                      | 3 ++-
 policyengine_us_data/storage/README.md                    | 8 +-------
 policyengine_us_data/storage/medicaid_enrollment_2024.csv | 2 +-
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index c2032d84..c6f8c35d 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -1,4 +1,5 @@
 - bump: minor
   changes:
     added:
-    - Medicaid state level calibration targets.
\ No newline at end of file
+    - Medicaid state level calibration targets.
+    
\ No newline at end of file
diff --git a/policyengine_us_data/storage/README.md b/policyengine_us_data/storage/README.md
index 88431a8a..55f98ed9 100644
--- a/policyengine_us_data/storage/README.md
+++ b/policyengine_us_data/storage/README.md
@@ -1,9 +1,3 @@
-# Sources for datasets
-
-# title: Medicaid enrollment 2024
-reference: 
-
-
 # storage/ datasets 
 
 - **aca_spending_and_enrollment_2024.csv**  
@@ -14,4 +8,4 @@ reference:
 - **medicaid_enrollment_2024.csv**  
   • Source: MACPAC Enrollment Tables, FFY 2024  
   • Date: 2024  
-  • Location: `https://www.medicaid.gov/resources-for-states/downloads/eligib-oper-and-enrol-snap-december2024.pdf#page=26
+  • Location: https://www.medicaid.gov/resources-for-states/downloads/eligib-oper-and-enrol-snap-december2024.pdf#page=26
diff --git a/policyengine_us_data/storage/medicaid_enrollment_2024.csv b/policyengine_us_data/storage/medicaid_enrollment_2024.csv
index 67e115c7..108670c5 100644
--- a/policyengine_us_data/storage/medicaid_enrollment_2024.csv
+++ b/policyengine_us_data/storage/medicaid_enrollment_2024.csv
@@ -49,4 +49,4 @@ VT,151833
 WA,1776116
 WI,1108320
 WV,467632
-WY,57320
\ No newline at end of file
+WY,57320

From 0584a3c7a9609d745104af806ff0b47ac9db0437 Mon Sep 17 00:00:00 2001
From: MaxGhenis <MaxGhenis@users.noreply.github.com>
Date: Tue, 8 Jul 2025 21:53:05 +0000
Subject: [PATCH 08/34] Update package version

---
 CHANGELOG.md         | 7 +++++++
 changelog.yaml       | 5 +++++
 changelog_entry.yaml | 4 ----
 pyproject.toml       | 2 +-
 4 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e8b2a506..4abf90f0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.36.2] - 2025-07-08 21:53:02
+
+### Fixed
+
+- Use SURVIVING_SPOUSE and is_surviving_spouse instead of WIDOW and is_widowed.
+
 ## [1.36.1] - 2025-07-03 09:21:06
 
 ### Changed
@@ -508,6 +514,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 
 
+[1.36.2]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.36.1...1.36.2
 [1.36.1]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.36.0...1.36.1
 [1.36.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.35.2...1.36.0
 [1.35.2]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.35.1...1.35.2
diff --git a/changelog.yaml b/changelog.yaml
index 2f07e429..453cabf9 100644
--- a/changelog.yaml
+++ b/changelog.yaml
@@ -423,3 +423,8 @@
     changed:
     - PR tests to be more similar to production builds.
   date: 2025-07-03 09:21:06
+- bump: patch
+  changes:
+    fixed:
+    - Use SURVIVING_SPOUSE and is_surviving_spouse instead of WIDOW and is_widowed.
+  date: 2025-07-08 21:53:02
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index 34cd58d6..e69de29b 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -1,4 +0,0 @@
-- bump: patch
-  changes:
-    fixed:
-    - Use SURVIVING_SPOUSE and is_surviving_spouse instead of WIDOW and is_widowed.
diff --git a/pyproject.toml b/pyproject.toml
index 2f8ca522..833e554a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "policyengine_us_data"
-version = "1.36.1"
+version = "1.36.2"
 description = "A package to create representative microdata for the US."
 readme = "README.md"
 authors = [

From 31fbfe35d1e6744ea4f744518da7dce6f25e4f19 Mon Sep 17 00:00:00 2001
From: MaxGhenis <MaxGhenis@users.noreply.github.com>
Date: Wed, 9 Jul 2025 14:58:35 +0000
Subject: [PATCH 09/34] Update package version

---
 CHANGELOG.md         | 7 +++++++
 changelog.yaml       | 5 +++++
 changelog_entry.yaml | 5 -----
 pyproject.toml       | 2 +-
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4abf90f0..6299d8fb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.37.0] - 2025-07-09 14:58:33
+
+### Added
+
+- Medicaid state level calibration targets.
+
 ## [1.36.2] - 2025-07-08 21:53:02
 
 ### Fixed
@@ -514,6 +520,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 
 
+[1.37.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.36.2...1.37.0
 [1.36.2]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.36.1...1.36.2
 [1.36.1]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.36.0...1.36.1
 [1.36.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.35.2...1.36.0
diff --git a/changelog.yaml b/changelog.yaml
index 453cabf9..699b2430 100644
--- a/changelog.yaml
+++ b/changelog.yaml
@@ -428,3 +428,8 @@
     fixed:
     - Use SURVIVING_SPOUSE and is_surviving_spouse instead of WIDOW and is_widowed.
   date: 2025-07-08 21:53:02
+- bump: minor
+  changes:
+    added:
+    - Medicaid state level calibration targets.
+  date: 2025-07-09 14:58:33
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index c6f8c35d..e69de29b 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -1,5 +0,0 @@
-- bump: minor
-  changes:
-    added:
-    - Medicaid state level calibration targets.
-    
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 833e554a..0352db69 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "policyengine_us_data"
-version = "1.36.2"
+version = "1.37.0"
 description = "A package to create representative microdata for the US."
 readme = "README.md"
 authors = [

From c621c5efc9c062fc31f9d6e9e06b7297f2e70c93 Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Fri, 11 Jul 2025 13:44:20 -0400
Subject: [PATCH 10/34] added CPS_2023 to lite mode generation

---
 changelog_entry.yaml                     | 4 ++++
 policyengine_us_data/datasets/cps/cps.py | 1 +
 2 files changed, 5 insertions(+)

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29b..dcce3f1a 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: patch
+  changes:
+    changed:
+    - lite mode now builds CPS_2023 in addition to CPS_2024
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 3b976a31..fde981ba 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -2006,6 +2006,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
 
 if __name__ == "__main__":
     if test_lite:
+        CPS_2023().generate()
         CPS_2024().generate()
     else:
         CPS_2021().generate()

From 30846a991a53b7c313e2761bdd01523cc308b42e Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Fri, 11 Jul 2025 14:54:23 -0400
Subject: [PATCH 11/34] Fixed manual test

---
 .github/workflows/code_changes.yaml      | 1 +
 .github/workflows/manual_tests.yaml      | 2 +-
 policyengine_us_data/datasets/cps/cps.py | 1 +
 pyproject.toml                           | 2 +-
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/code_changes.yaml b/.github/workflows/code_changes.yaml
index 6b474227..edd804db 100644
--- a/.github/workflows/code_changes.yaml
+++ b/.github/workflows/code_changes.yaml
@@ -2,6 +2,7 @@
 
 name: Code changes
 on:
+  workflow_call:
   push:
     branches:
       - main
diff --git a/.github/workflows/manual_tests.yaml b/.github/workflows/manual_tests.yaml
index a2daca18..fb13ba89 100644
--- a/.github/workflows/manual_tests.yaml
+++ b/.github/workflows/manual_tests.yaml
@@ -11,7 +11,7 @@ on:
 
 jobs:
   test:
-    uses: ./.github/workflows/pr_changelog.yaml
+    uses: ./.github/workflows/code_changes.yaml
     with:
       TEST_LITE: ${{ github.event.inputs.test_lite }}
     secrets: inherit
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index fde981ba..177f4707 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -2008,6 +2008,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
     if test_lite:
         CPS_2023().generate()
         CPS_2024().generate()
+        print(2 + 2)
     else:
         CPS_2021().generate()
         CPS_2022().generate()
diff --git a/pyproject.toml b/pyproject.toml
index 0352db69..3490ff1b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ authors = [
 license = {file = "LICENSE"}
 requires-python = ">=3.11, <3.13.0"
 dependencies = [
-    "policyengine-us>=1.197.0",
+    "policyengine-us>=1.333.0",
     "policyengine-core>=3.14.1",
     "requests",
     "tqdm",

From 552de174a3bd6c05199737bc398fb6724e13b7fa Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Fri, 11 Jul 2025 15:02:22 -0400
Subject: [PATCH 12/34] try again with locked version

---
 policyengine_us_data/datasets/cps/cps.py | 2 +-
 pyproject.toml                           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 177f4707..09a594c3 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -2008,7 +2008,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
     if test_lite:
         CPS_2023().generate()
         CPS_2024().generate()
-        print(2 + 2)
+        print(2 + 3)
     else:
         CPS_2021().generate()
         CPS_2022().generate()
diff --git a/pyproject.toml b/pyproject.toml
index 3490ff1b..74af05bf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ authors = [
 license = {file = "LICENSE"}
 requires-python = ">=3.11, <3.13.0"
 dependencies = [
-    "policyengine-us>=1.333.0",
+    "policyengine-us==1.333.0",
     "policyengine-core>=3.14.1",
     "requests",
     "tqdm",

From ab71dd4483ab24ab83ec61fd71260c77cb4bc9ac Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Fri, 11 Jul 2025 15:44:32 -0400
Subject: [PATCH 13/34] trying things

---
 policyengine_us_data/datasets/cps/cps.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 09a594c3..1edce6e9 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -100,9 +100,14 @@ def downsample(self, frac: float):
         original_dtypes = {
             key: original_data[key].dtype for key in original_data
         }
-
+        print("\n\nHERE IS THE PROBLEM-----")
+        print(f"frac is {frac}")
+        print(self)
+        print(Microsimulation)
         sim = Microsimulation(dataset=self)
-        sim.subsample(frac=frac)
+        print(sim)
+        print(sim.subsample)
+        #sim.subsample(frac=frac)
 
         for key in original_data:
             if key not in sim.tax_benefit_system.variables:

From 57fbddafc1ea429fbebb6e8c50dc0bc585e99115 Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Fri, 11 Jul 2025 15:45:47 -0400
Subject: [PATCH 14/34] lint

---
 policyengine_us_data/datasets/cps/cps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 1edce6e9..30688719 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -107,7 +107,7 @@ def downsample(self, frac: float):
         sim = Microsimulation(dataset=self)
         print(sim)
         print(sim.subsample)
-        #sim.subsample(frac=frac)
+        # sim.subsample(frac=frac)
 
         for key in original_data:
             if key not in sim.tax_benefit_system.variables:

From efd267f53c0838af1cad1613baf58868d4a51314 Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Fri, 11 Jul 2025 16:05:26 -0400
Subject: [PATCH 15/34] trying 3.11.12

---
 policyengine_us_data/datasets/cps/cps.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 30688719..8219e915 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -107,7 +107,7 @@ def downsample(self, frac: float):
         sim = Microsimulation(dataset=self)
         print(sim)
         print(sim.subsample)
-        # sim.subsample(frac=frac)
+        sim.subsample(frac=frac)
 
         for key in original_data:
             if key not in sim.tax_benefit_system.variables:
@@ -2013,7 +2013,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
     if test_lite:
         CPS_2023().generate()
         CPS_2024().generate()
-        print(2 + 3)
+        print(2 + 5)
     else:
         CPS_2021().generate()
         CPS_2022().generate()

From eb390dd580c9cd1407f32c5b19dbd8d9784d828f Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Fri, 11 Jul 2025 16:10:26 -0400
Subject: [PATCH 16/34] now actually specifying py version

---
 policyengine_us_data/datasets/cps/cps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 8219e915..a25aba26 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -2013,7 +2013,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
     if test_lite:
         CPS_2023().generate()
         CPS_2024().generate()
-        print(2 + 5)
+        print(2 + 7)
     else:
         CPS_2021().generate()
         CPS_2022().generate()

From 9d69d6dd1cd6fc759f2c33417bdf204c401c170a Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Fri, 11 Jul 2025 16:33:21 -0400
Subject: [PATCH 17/34] pandas v

---
 policyengine_us_data/datasets/cps/cps.py | 2 +-
 pyproject.toml                           | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index a25aba26..b3554604 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -2013,7 +2013,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
     if test_lite:
         CPS_2023().generate()
         CPS_2024().generate()
-        print(2 + 7)
+        print(2 + 8)
     else:
         CPS_2021().generate()
         CPS_2022().generate()
diff --git a/pyproject.toml b/pyproject.toml
index 74af05bf..6c767ede 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,6 +19,7 @@ requires-python = ">=3.11, <3.13.0"
 dependencies = [
     "policyengine-us==1.333.0",
     "policyengine-core>=3.14.1",
+    "pandas==2.3.1",
     "requests",
     "tqdm",
     "microdf_python>=0.4.3",

From 54a7f9d1cbafae7a5962095af4bff12673f62fd8 Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Fri, 11 Jul 2025 16:47:12 -0400
Subject: [PATCH 18/34] small runner

---
 .github/workflows/pr_code_changes.yaml   | 2 +-
 policyengine_us_data/datasets/cps/cps.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml
index 213d192f..385e5a4c 100644
--- a/.github/workflows/pr_code_changes.yaml
+++ b/.github/workflows/pr_code_changes.yaml
@@ -49,7 +49,7 @@ jobs:
         run: python -c "from policyengine_core.data import Dataset; print('Core import OK')"
 
   Test:
-      runs-on: larger-runner
+      runs-on: ubuntu-latest 
       needs: Lint
       env:
         HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index b3554604..027c2ef5 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -2013,7 +2013,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
     if test_lite:
         CPS_2023().generate()
         CPS_2024().generate()
-        print(2 + 8)
+        print(2 + 0)
     else:
         CPS_2021().generate()
         CPS_2022().generate()

From 6122adfeeb1276b1641246d8466388829df013d2 Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Fri, 11 Jul 2025 16:53:57 -0400
Subject: [PATCH 19/34] trying everything

---
 .github/workflows/pr_code_changes.yaml   | 2 +-
 policyengine_us_data/datasets/cps/cps.py | 2 +-
 pyproject.toml                           | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml
index 385e5a4c..02209591 100644
--- a/.github/workflows/pr_code_changes.yaml
+++ b/.github/workflows/pr_code_changes.yaml
@@ -63,7 +63,7 @@ jobs:
           - name: Set up Python
             uses: actions/setup-python@v2
             with:
-                python-version: '3.11'
+                python-version: '3.11.12'
           - name: Install package
             run: uv pip install -e .[dev] --system
 
diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 027c2ef5..afbf223f 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -2013,7 +2013,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
     if test_lite:
         CPS_2023().generate()
         CPS_2024().generate()
-        print(2 + 0)
+        print(2 + 9)
     else:
         CPS_2021().generate()
         CPS_2022().generate()
diff --git a/pyproject.toml b/pyproject.toml
index 6c767ede..d87290a2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,11 +15,11 @@ authors = [
     {name = "PolicyEngine", email = "hello@policyengine.org"},
 ]
 license = {file = "LICENSE"}
-requires-python = ">=3.11, <3.13.0"
+requires-python = ">=3.11, <3.11.13"
 dependencies = [
     "policyengine-us==1.333.0",
     "policyengine-core>=3.14.1",
-    "pandas==2.3.1",
+    "pandas==2.3.0",
     "requests",
     "tqdm",
     "microdf_python>=0.4.3",

From bcb6b1d8e7454546874d21ac307d0b2a9d5ed08e Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Fri, 11 Jul 2025 17:02:45 -0400
Subject: [PATCH 20/34] relaxing python version in pyproject.toml

---
 policyengine_us_data/datasets/cps/cps.py | 2 +-
 pyproject.toml                           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index afbf223f..3173d4d6 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -2013,7 +2013,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
     if test_lite:
         CPS_2023().generate()
         CPS_2024().generate()
-        print(2 + 9)
+        print(3 + 9)
     else:
         CPS_2021().generate()
         CPS_2022().generate()
diff --git a/pyproject.toml b/pyproject.toml
index d87290a2..fe5fda52 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,7 +15,7 @@ authors = [
     {name = "PolicyEngine", email = "hello@policyengine.org"},
 ]
 license = {file = "LICENSE"}
-requires-python = ">=3.11, <3.11.13"
+requires-python = ">=3.11, <3.13.0"
 dependencies = [
     "policyengine-us==1.333.0",
     "policyengine-core>=3.14.1",

From eed999390421a8a9a02b83076a3e3af788aac186 Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Fri, 11 Jul 2025 17:29:12 -0400
Subject: [PATCH 21/34] putting things back in order.

---
 policyengine_us_data/datasets/cps/cps.py | 7 -------
 pyproject.toml                           | 2 +-
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 3173d4d6..d9957cbb 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -100,13 +100,7 @@ def downsample(self, frac: float):
         original_dtypes = {
             key: original_data[key].dtype for key in original_data
         }
-        print("\n\nHERE IS THE PROBLEM-----")
-        print(f"frac is {frac}")
-        print(self)
-        print(Microsimulation)
         sim = Microsimulation(dataset=self)
-        print(sim)
-        print(sim.subsample)
         sim.subsample(frac=frac)
 
         for key in original_data:
@@ -2013,7 +2007,6 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
     if test_lite:
         CPS_2023().generate()
         CPS_2024().generate()
-        print(3 + 9)
     else:
         CPS_2021().generate()
         CPS_2022().generate()
diff --git a/pyproject.toml b/pyproject.toml
index fe5fda52..4bec19eb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ authors = [
 license = {file = "LICENSE"}
 requires-python = ">=3.11, <3.13.0"
 dependencies = [
-    "policyengine-us==1.333.0",
+    "policyengine-us==1.340.1",
     "policyengine-core>=3.14.1",
     "pandas==2.3.0",
     "requests",

From 65473ce74d24808388f50599a4bb9179f76c311c Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <35577657+nikhilwoodruff@users.noreply.github.com>
Date: Sat, 12 Jul 2025 13:01:15 +0100
Subject: [PATCH 22/34] Use normal runner in PR tests

---
 .github/workflows/pr_code_changes.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml
index 213d192f..4e30d089 100644
--- a/.github/workflows/pr_code_changes.yaml
+++ b/.github/workflows/pr_code_changes.yaml
@@ -49,7 +49,7 @@ jobs:
         run: python -c "from policyengine_core.data import Dataset; print('Core import OK')"
 
   Test:
-      runs-on: larger-runner
+      runs-on: ubuntu-latest
       needs: Lint
       env:
         HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}

From ff4e9a634e5e727362e881a394d368a291bfcb9b Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Sat, 12 Jul 2025 09:53:07 -0400
Subject: [PATCH 23/34] added the 3.11.12 pin

---
 .github/workflows/code_changes.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/code_changes.yaml b/.github/workflows/code_changes.yaml
index b752e953..fef913e9 100644
--- a/.github/workflows/code_changes.yaml
+++ b/.github/workflows/code_changes.yaml
@@ -38,7 +38,7 @@ jobs:
           - name: Set up Python
             uses: actions/setup-python@v2
             with:
-                python-version: '3.11'
+                python-version: '3.11.12'
           - uses: "google-github-actions/auth@v2"
             with:
               workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider"

From 43ad4394ff074b8419e7fa954e2964682e22bc3c Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Sun, 13 Jul 2025 20:00:50 -0400
Subject: [PATCH 24/34] cps.py

---
 policyengine_us_data/datasets/cps/cps.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index d9957cbb..202f9c69 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -2007,6 +2007,7 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
     if test_lite:
         CPS_2023().generate()
         CPS_2024().generate()
+        print(3)
     else:
         CPS_2021().generate()
         CPS_2022().generate()

From 879770e8aefcaa5c25f503bb877766b24f372d67 Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Sun, 13 Jul 2025 20:32:26 -0400
Subject: [PATCH 25/34] adding diagnostics

---
 .../datasets/cps/enhanced_cps.py                | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/policyengine_us_data/datasets/cps/enhanced_cps.py b/policyengine_us_data/datasets/cps/enhanced_cps.py
index b8af12ce..c01a6b17 100644
--- a/policyengine_us_data/datasets/cps/enhanced_cps.py
+++ b/policyengine_us_data/datasets/cps/enhanced_cps.py
@@ -190,6 +190,23 @@ def generate(self):
             )
             data["household_weight"][year] = optimised_weights
 
+        print("\n\n---reweighting quick diagnostics----\n")
+        estimate = optimised_weights @ loss_matrix
+        rel_error = (
+            ((estimate - targets_array) + 1) / (targets_array + 1)
+        ) ** 2
+        print(
+            f"rel_error: min: {np.min(rel_error):.2f}, max: {np.max(rel_error):.2f}",
+            f"mean: {np.mean(rel_error):.2f}, median: {np.median(rel_error):.2f}"
+        )
+        print("Relative error over 100% for:")
+        for i in np.where(rel_error > 1)[0]:
+            print(f"target_name: {loss_matrix.columns[i]}")
+            print(f"target_value: {targets_array[i]}")
+            print(f"estimate_value: {estimate[i]}")
+            print(f"has rel_error: {rel_error.values[i]:.2f}\n")
+        print("---End of reweighting quick diagnostics------")
+
         self.save_dataset(data)
 
 

From 10b725cfaeb36041dd5164c830a9ae2292f3a537 Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Sun, 13 Jul 2025 20:32:47 -0400
Subject: [PATCH 26/34] lint

---
 policyengine_us_data/datasets/cps/enhanced_cps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/policyengine_us_data/datasets/cps/enhanced_cps.py b/policyengine_us_data/datasets/cps/enhanced_cps.py
index c01a6b17..d4dd5ba6 100644
--- a/policyengine_us_data/datasets/cps/enhanced_cps.py
+++ b/policyengine_us_data/datasets/cps/enhanced_cps.py
@@ -197,7 +197,7 @@ def generate(self):
         ) ** 2
         print(
             f"rel_error: min: {np.min(rel_error):.2f}, max: {np.max(rel_error):.2f}",
-            f"mean: {np.mean(rel_error):.2f}, median: {np.median(rel_error):.2f}"
+            f"mean: {np.mean(rel_error):.2f}, median: {np.median(rel_error):.2f}",
         )
         print("Relative error over 100% for:")
         for i in np.where(rel_error > 1)[0]:

From f0973be60eef539992dbaf75fd850352226e460b Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Sun, 13 Jul 2025 22:27:46 -0400
Subject: [PATCH 27/34] taking out bad targets

---
 policyengine_us_data/datasets/cps/cps.py      |  1 -
 .../datasets/cps/enhanced_cps.py              | 62 +++++++++++++++++--
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/cps.py b/policyengine_us_data/datasets/cps/cps.py
index 202f9c69..d9957cbb 100644
--- a/policyengine_us_data/datasets/cps/cps.py
+++ b/policyengine_us_data/datasets/cps/cps.py
@@ -2007,7 +2007,6 @@ class Pooled_3_Year_CPS_2023(PooledCPS):
     if test_lite:
         CPS_2023().generate()
         CPS_2024().generate()
-        print(3)
     else:
         CPS_2021().generate()
         CPS_2022().generate()
diff --git a/policyengine_us_data/datasets/cps/enhanced_cps.py b/policyengine_us_data/datasets/cps/enhanced_cps.py
index d4dd5ba6..dab9df78 100644
--- a/policyengine_us_data/datasets/cps/enhanced_cps.py
+++ b/policyengine_us_data/datasets/cps/enhanced_cps.py
@@ -28,6 +28,7 @@ def reweight(
     targets_array,
     dropout_rate=0.05,
     log_path="calibration_log.csv",
+    epochs=150,
 ):
     target_names = np.array(loss_matrix.columns)
     is_national = loss_matrix.columns.str.startswith("nation/")
@@ -45,7 +46,7 @@ def reweight(
         np.log(original_weights), requires_grad=True, dtype=torch.float32
     )
 
-    # TODO: replace this with a call to the python reweight.py package.
+    # TODO: replace this functionality from the microcalibrate package.
     def loss(weights):
         # Check for Nans in either the weights or the loss matrix
         if torch.isnan(weights).any():
@@ -78,7 +79,7 @@ def dropout_weights(weights, p):
 
     start_loss = None
 
-    iterator = trange(500)
+    iterator = trange(epochs)
     performance = pd.DataFrame()
     for i in iterator:
         optimizer.zero_grad()
@@ -178,18 +179,71 @@ def generate(self):
         original_weights = original_weights.values + np.random.normal(
             1, 0.1, len(original_weights)
         )
+
+        bad_targets = [
+            "nation/irs/adjusted gross income/total/AGI in 10k-15k/taxable/Head of Household",
+            "nation/irs/adjusted gross income/total/AGI in 15k-20k/taxable/Head of Household",
+            "nation/irs/adjusted gross income/total/AGI in 10k-15k/taxable/Married Filing Jointly/Surviving Spouse",
+            "nation/irs/adjusted gross income/total/AGI in 15k-20k/taxable/Married Filing Jointly/Surviving Spouse",
+            "nation/irs/count/count/AGI in 10k-15k/taxable/Head of Household",
+            "nation/irs/count/count/AGI in 15k-20k/taxable/Head of Household",
+            "nation/irs/count/count/AGI in 10k-15k/taxable/Married Filing Jointly/Surviving Spouse",
+            "nation/irs/count/count/AGI in 15k-20k/taxable/Married Filing Jointly/Surviving Spouse",
+            "state/RI/adjusted_gross_income/amount/-inf_1",
+            "nation/irs/adjusted gross income/total/AGI in 10k-15k/taxable/Head of Household",
+            "nation/irs/adjusted gross income/total/AGI in 15k-20k/taxable/Head of Household",
+            "nation/irs/adjusted gross income/total/AGI in 10k-15k/taxable/Married Filing Jointly/Surviving Spouse",
+            "nation/irs/adjusted gross income/total/AGI in 15k-20k/taxable/Married Filing Jointly/Surviving Spouse",
+            "nation/irs/count/count/AGI in 10k-15k/taxable/Head of Household",
+            "nation/irs/count/count/AGI in 15k-20k/taxable/Head of Household",
+            "nation/irs/count/count/AGI in 10k-15k/taxable/Married Filing Jointly/Surviving Spouse",
+            "nation/irs/count/count/AGI in 15k-20k/taxable/Married Filing Jointly/Surviving Spouse",
+            "state/RI/adjusted_gross_income/amount/-inf_1",
+            "target_name: nation/irs/exempt interest/count/AGI in -inf-inf/taxable/All",
+        ]
+
+        # Run the optimization procedure to get (close to) minimum loss weights
         for year in range(self.start_year, self.end_year + 1):
             loss_matrix, targets_array = build_loss_matrix(
                 self.input_dataset, year
             )
+            zero_mask = np.isclose(targets_array, 0.0, atol=0.1)
+            bad_mask = loss_matrix.columns.isin(bad_targets)
+            keep_mask_bool = ~(zero_mask | bad_mask)
+            keep_idx = np.where(keep_mask_bool)[0]
+            loss_matrix_clean = loss_matrix.iloc[:, keep_idx]
+            targets_array_clean = targets_array[keep_idx]
+            assert loss_matrix_clean.shape[1] == targets_array_clean.size
+
             optimised_weights = reweight(
                 original_weights,
-                loss_matrix,
-                targets_array,
+                loss_matrix_clean,
+                targets_array_clean,
                 log_path="calibration_log.csv",
+                epochs=150,
             )
             data["household_weight"][year] = optimised_weights
 
+            print("\n\n---reweighting quick diagnostics----\n")
+            estimate = optimised_weights @ loss_matrix_clean
+            rel_error = (
+                ((estimate - targets_array_clean) + 1)
+                / (targets_array_clean + 1)
+            ) ** 2
+            print(
+                f"rel_error: min: {np.min(rel_error):.2f}, "
+                f"max: {np.max(rel_error):.2f} "
+                f"mean: {np.mean(rel_error):.2f}, "
+                f"median: {np.median(rel_error):.2f}"
+            )
+            print("Relative error over 100% for:")
+            for i in np.where(rel_error > 1)[0]:
+                print(f"target_name: {loss_matrix_clean.columns[i]}")
+                print(f"target_value: {targets_array_clean[i]}")
+                print(f"estimate_value: {estimate[i]}")
+                print(f"has rel_error: {rel_error[i]:.2f}\n")
+            print("---End of reweighting quick diagnostics------")
+
         print("\n\n---reweighting quick diagnostics----\n")
         estimate = optimised_weights @ loss_matrix
         rel_error = (

From d8667a19d7641565bb85171dde03a6a55d2ee3c4 Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Sun, 13 Jul 2025 23:05:09 -0400
Subject: [PATCH 28/34] fixing workflow arg passthrough

---
 .github/workflows/pr_code_changes.yaml | 16 +++++++++++++---
 changelog_entry.yaml                   |  6 ++++++
 pyproject.toml                         |  4 ++--
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml
index c84a4b97..56224a2e 100644
--- a/.github/workflows/pr_code_changes.yaml
+++ b/.github/workflows/pr_code_changes.yaml
@@ -11,6 +11,14 @@ on:
       - tests/**
       - .github/workflows/**
 
+  workflow_call:
+    inputs:
+      TEST_LITE:
+        description: 'Run in lite mode'
+        type: boolean
+        required: false
+        default: false
+
 jobs:
   Lint:
     runs-on: ubuntu-latest
@@ -53,6 +61,7 @@ jobs:
       needs: Lint
       env:
         HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
+        TEST_LITE: ${{ inputs.TEST_LITE }}
       steps:
           - name: Checkout repo
             uses: actions/checkout@v2
@@ -63,7 +72,7 @@ jobs:
           - name: Set up Python
             uses: actions/setup-python@v2
             with:
-                python-version: '3.11.12'
+                python-version: '3.11'
           - name: Install package
             run: uv pip install -e .[dev] --system
 
@@ -75,8 +84,9 @@ jobs:
           - name: Build datasets
             run: make data
             env:
-              TEST_LITE: true
-              PYTHON_LOG_LEVEL: INFO  
+              TEST_LITE: ${{ env.TEST_LITE }}
+              PYTHON_LOG_LEVEL: INFO
+
           - name: Save calibration log
             uses: actions/upload-artifact@v4
             with:
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index dcce3f1a..bce8b349 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -1,4 +1,10 @@
 - bump: patch
   changes:
     changed:
+    - bad targets (causing problems with estimation) removed
     - lite mode now builds CPS_2023 in addition to CPS_2024
+    - gave reweight an epochs argument and set it at 150 for optimization
+    - updating minimum versions on policyengine-us and pandas dependencies
+    fixed:
+    - manual workflow now can call PR code changes
+
diff --git a/pyproject.toml b/pyproject.toml
index 4bec19eb..481cbc37 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,9 +17,9 @@ authors = [
 license = {file = "LICENSE"}
 requires-python = ">=3.11, <3.13.0"
 dependencies = [
-    "policyengine-us==1.340.1",
+    "policyengine-us>=1.340.1",
     "policyengine-core>=3.14.1",
-    "pandas==2.3.0",
+    "pandas>=2.3.0",
     "requests",
     "tqdm",
     "microdf_python>=0.4.3",

From 8f14e52158b99f5ec3c5cd43279ae0799955c1f0 Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Sun, 13 Jul 2025 23:09:32 -0400
Subject: [PATCH 29/34] deps and defaults

---
 .github/workflows/code_changes.yaml    | 2 +-
 .github/workflows/pr_code_changes.yaml | 2 +-
 pyproject.toml                         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/code_changes.yaml b/.github/workflows/code_changes.yaml
index fef913e9..b752e953 100644
--- a/.github/workflows/code_changes.yaml
+++ b/.github/workflows/code_changes.yaml
@@ -38,7 +38,7 @@ jobs:
           - name: Set up Python
             uses: actions/setup-python@v2
             with:
-                python-version: '3.11.12'
+                python-version: '3.11'
           - uses: "google-github-actions/auth@v2"
             with:
               workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider"
diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml
index 56224a2e..1e05b564 100644
--- a/.github/workflows/pr_code_changes.yaml
+++ b/.github/workflows/pr_code_changes.yaml
@@ -17,7 +17,7 @@ on:
         description: 'Run in lite mode'
         type: boolean
         required: false
-        default: false
+        default: true
 
 jobs:
   Lint:
diff --git a/pyproject.toml b/pyproject.toml
index 481cbc37..f983258d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ license = {file = "LICENSE"}
 requires-python = ">=3.11, <3.13.0"
 dependencies = [
     "policyengine-us>=1.340.1",
-    "policyengine-core>=3.14.1",
+    "policyengine-core>=3.17.1",
     "pandas>=2.3.0",
     "requests",
     "tqdm",

From 29e1621d74802624ef647fa442b8e360931a438f Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Sun, 13 Jul 2025 23:12:21 -0400
Subject: [PATCH 30/34] wrong pipeline for manual test

---
 .github/workflows/manual_tests.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/manual_tests.yaml b/.github/workflows/manual_tests.yaml
index fb13ba89..fd6fa061 100644
--- a/.github/workflows/manual_tests.yaml
+++ b/.github/workflows/manual_tests.yaml
@@ -11,7 +11,7 @@ on:
 
 jobs:
   test:
-    uses: ./.github/workflows/code_changes.yaml
+    uses: ./.github/workflows/pr_code_changes.yaml
     with:
       TEST_LITE: ${{ github.event.inputs.test_lite }}
     secrets: inherit

From fcd5835d73bcb13291be79e36784cdb9cab47b9b Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Sun, 13 Jul 2025 23:30:46 -0400
Subject: [PATCH 31/34] trying again to get the manual test to work

---
 .github/workflows/manual_tests.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/manual_tests.yaml b/.github/workflows/manual_tests.yaml
index fd6fa061..55667dbc 100644
--- a/.github/workflows/manual_tests.yaml
+++ b/.github/workflows/manual_tests.yaml
@@ -13,5 +13,5 @@ jobs:
   test:
     uses: ./.github/workflows/pr_code_changes.yaml
     with:
-      TEST_LITE: ${{ github.event.inputs.test_lite }}
+      TEST_LITE: ${{ inputs.test_lite }}
     secrets: inherit

From 82272b6458f14ac49d5bf4c82eee4faa4746236b Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Sun, 13 Jul 2025 23:53:27 -0400
Subject: [PATCH 32/34] reverting to older workflow code

---
 .github/workflows/manual_tests.yaml    | 17 -----------------
 .github/workflows/pr_code_changes.yaml | 14 ++------------
 changelog_entry.yaml                   |  4 +---
 3 files changed, 3 insertions(+), 32 deletions(-)
 delete mode 100644 .github/workflows/manual_tests.yaml

diff --git a/.github/workflows/manual_tests.yaml b/.github/workflows/manual_tests.yaml
deleted file mode 100644
index 55667dbc..00000000
--- a/.github/workflows/manual_tests.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-name: Manual tests
-
-on:
-  workflow_dispatch:
-    inputs:
-      test_lite:
-        description: 'Run in lite mode'
-        required: true
-        default: true
-        type: boolean
-
-jobs:
-  test:
-    uses: ./.github/workflows/pr_code_changes.yaml
-    with:
-      TEST_LITE: ${{ inputs.test_lite }}
-    secrets: inherit
diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml
index 1e05b564..4e30d089 100644
--- a/.github/workflows/pr_code_changes.yaml
+++ b/.github/workflows/pr_code_changes.yaml
@@ -11,14 +11,6 @@ on:
       - tests/**
       - .github/workflows/**
 
-  workflow_call:
-    inputs:
-      TEST_LITE:
-        description: 'Run in lite mode'
-        type: boolean
-        required: false
-        default: true
-
 jobs:
   Lint:
     runs-on: ubuntu-latest
@@ -61,7 +53,6 @@ jobs:
       needs: Lint
       env:
         HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }}
-        TEST_LITE: ${{ inputs.TEST_LITE }}
       steps:
           - name: Checkout repo
             uses: actions/checkout@v2
@@ -84,9 +75,8 @@ jobs:
           - name: Build datasets
             run: make data
             env:
-              TEST_LITE: ${{ env.TEST_LITE }}
-              PYTHON_LOG_LEVEL: INFO
-
+              TEST_LITE: true
+              PYTHON_LOG_LEVEL: INFO  
           - name: Save calibration log
             uses: actions/upload-artifact@v4
             with:
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index bce8b349..3f9b8627 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -5,6 +5,4 @@
     - lite mode now builds CPS_2023 in addition to CPS_2024
     - gave reweight an epochs argument and set it at 150 for optimization
     - updating minimum versions on policyengine-us and pandas dependencies
-    fixed:
-    - manual workflow now can call PR code changes
-
+    - getting rid of non-working manual workflow code

From ab8fa4f1b6cfc40e6f8ebfb70b2c5f85580b774e Mon Sep 17 00:00:00 2001
From: baogorek <baogorek@gmail.com>
Date: Mon, 14 Jul 2025 00:12:37 -0400
Subject: [PATCH 33/34] cleaning up enhanced_cps.py

---
 .../datasets/cps/enhanced_cps.py              | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/policyengine_us_data/datasets/cps/enhanced_cps.py b/policyengine_us_data/datasets/cps/enhanced_cps.py
index dab9df78..7a471d40 100644
--- a/policyengine_us_data/datasets/cps/enhanced_cps.py
+++ b/policyengine_us_data/datasets/cps/enhanced_cps.py
@@ -199,7 +199,7 @@ def generate(self):
             "nation/irs/count/count/AGI in 10k-15k/taxable/Married Filing Jointly/Surviving Spouse",
             "nation/irs/count/count/AGI in 15k-20k/taxable/Married Filing Jointly/Surviving Spouse",
             "state/RI/adjusted_gross_income/amount/-inf_1",
-            "target_name: nation/irs/exempt interest/count/AGI in -inf-inf/taxable/All",
+            "nation/irs/exempt interest/count/AGI in -inf-inf/taxable/All",
         ]
 
         # Run the optimization procedure to get (close to) minimum loss weights
@@ -244,23 +244,6 @@ def generate(self):
                 print(f"has rel_error: {rel_error[i]:.2f}\n")
             print("---End of reweighting quick diagnostics------")
 
-        print("\n\n---reweighting quick diagnostics----\n")
-        estimate = optimised_weights @ loss_matrix
-        rel_error = (
-            ((estimate - targets_array) + 1) / (targets_array + 1)
-        ) ** 2
-        print(
-            f"rel_error: min: {np.min(rel_error):.2f}, max: {np.max(rel_error):.2f}",
-            f"mean: {np.mean(rel_error):.2f}, median: {np.median(rel_error):.2f}",
-        )
-        print("Relative error over 100% for:")
-        for i in np.where(rel_error > 1)[0]:
-            print(f"target_name: {loss_matrix.columns[i]}")
-            print(f"target_value: {targets_array[i]}")
-            print(f"estimate_value: {estimate[i]}")
-            print(f"has rel_error: {rel_error.values[i]:.2f}\n")
-        print("---End of reweighting quick diagnostics------")
-
         self.save_dataset(data)
 
 

From 2508741c8df49861868e89da94c02075b404c86f Mon Sep 17 00:00:00 2001
From: MaxGhenis <MaxGhenis@users.noreply.github.com>
Date: Mon, 14 Jul 2025 15:33:13 +0000
Subject: [PATCH 34/34] Update package version

---
 CHANGELOG.md         | 11 +++++++++++
 changelog.yaml       |  9 +++++++++
 changelog_entry.yaml |  8 --------
 pyproject.toml       |  2 +-
 4 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6299d8fb..e355d4dd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.37.1] - 2025-07-14 15:33:11
+
+### Changed
+
+- bad targets (causing problems with estimation) removed
+- lite mode now builds CPS_2023 in addition to CPS_2024
+- gave reweight an epochs argument and set it at 150 for optimization
+- updating minimum versions on policyengine-us and pandas dependencies
+- getting rid of non-working manual workflow code
+
 ## [1.37.0] - 2025-07-09 14:58:33
 
 ### Added
@@ -520,6 +530,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 
 
+[1.37.1]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.37.0...1.37.1
 [1.37.0]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.36.2...1.37.0
 [1.36.2]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.36.1...1.36.2
 [1.36.1]: https://github.com/PolicyEngine/policyengine-us-data/compare/1.36.0...1.36.1
diff --git a/changelog.yaml b/changelog.yaml
index 699b2430..af7cdf32 100644
--- a/changelog.yaml
+++ b/changelog.yaml
@@ -433,3 +433,12 @@
     added:
     - Medicaid state level calibration targets.
   date: 2025-07-09 14:58:33
+- bump: patch
+  changes:
+    changed:
+    - bad targets (causing problems with estimation) removed
+    - lite mode now builds CPS_2023 in addition to CPS_2024
+    - gave reweight an epochs argument and set it at 150 for optimization
+    - updating minimum versions on policyengine-us and pandas dependencies
+    - getting rid of non-working manual workflow code
+  date: 2025-07-14 15:33:11
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index 3f9b8627..e69de29b 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -1,8 +0,0 @@
-- bump: patch
-  changes:
-    changed:
-    - bad targets (causing problems with estimation) removed
-    - lite mode now builds CPS_2023 in addition to CPS_2024
-    - gave reweight an epochs argument and set it at 150 for optimization
-    - updating minimum versions on policyengine-us and pandas dependencies
-    - getting rid of non-working manual workflow code
diff --git a/pyproject.toml b/pyproject.toml
index f983258d..5a75693f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "policyengine_us_data"
-version = "1.37.0"
+version = "1.37.1"
 description = "A package to create representative microdata for the US."
 readme = "README.md"
 authors = [