diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29b..ac664753 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: minor
+  changes:
+    added:
+    - Enhanced CPS minimizing tests. 
\ No newline at end of file
diff --git a/policyengine_us_data/datasets/cps/enhanced_cps.py b/policyengine_us_data/datasets/cps/enhanced_cps.py
index 7a471d40..2fbb0293 100644
--- a/policyengine_us_data/datasets/cps/enhanced_cps.py
+++ b/policyengine_us_data/datasets/cps/enhanced_cps.py
@@ -22,13 +22,27 @@
     torch = None
 
 
+bad_targets = [
+    "nation/irs/adjusted gross income/total/AGI in 10k-15k/taxable/Head of Household",
+    "nation/irs/adjusted gross income/total/AGI in 15k-20k/taxable/Head of Household",
+    "nation/irs/adjusted gross income/total/AGI in 10k-15k/taxable/Married Filing Jointly/Surviving Spouse",
+    "nation/irs/adjusted gross income/total/AGI in 15k-20k/taxable/Married Filing Jointly/Surviving Spouse",
+    "nation/irs/count/count/AGI in 10k-15k/taxable/Head of Household",
+    "nation/irs/count/count/AGI in 15k-20k/taxable/Head of Household",
+    "nation/irs/count/count/AGI in 10k-15k/taxable/Married Filing Jointly/Surviving Spouse",
+    "nation/irs/count/count/AGI in 15k-20k/taxable/Married Filing Jointly/Surviving Spouse",
+]
+
+
 def reweight(
     original_weights,
     loss_matrix,
     targets_array,
     dropout_rate=0.05,
-    log_path="calibration_log.csv",
     epochs=150,
+    log_path="calibration_log.csv",
+    penalty_approach=None,
+    penalty_weight=None,
 ):
     target_names = np.array(loss_matrix.columns)
     is_national = loss_matrix.columns.str.startswith("nation/")
@@ -46,8 +60,12 @@ def reweight(
         np.log(original_weights), requires_grad=True, dtype=torch.float32
     )
 
-    # TODO: replace this functionality from the microcalibrate package.
-    def loss(weights):
+    # TO DO: replace this with a call to the python reweight.py package.
+    def loss(
+        weights,
+        penalty_approach=penalty_approach,
+        penalty_weight=penalty_weight,
+    ):
         # Check for Nans in either the weights or the loss matrix
         if torch.isnan(weights).any():
             raise ValueError("Weights contain NaNs")
@@ -60,9 +78,51 @@ def loss(weights):
             ((estimate - targets_array) + 1) / (targets_array + 1)
         ) ** 2
         rel_error_normalized = rel_error * normalisation_factor
+
         if torch.isnan(rel_error_normalized).any():
             raise ValueError("Relative error contains NaNs")
-        return rel_error_normalized.mean()
+
+        if penalty_approach is not None and penalty_weight is not None:
+            # L0 penalty (approximated with smooth function)
+            # Since L0 is non-differentiable, we use a smooth approximation
+            # Common approaches:
+
+            epsilon = 1e-3  # Threshold for "near zero"
+
+            # Option 1: Sigmoid approximation
+            if penalty_approach == "l0_sigmoid":
+                smoothed_l0 = torch.sigmoid(
+                    (weights - epsilon) / (epsilon * 0.1)
+                ).mean()
+
+            # Option 2: Log-sum penalty (smoother)
+            if penalty_approach == "l0_log":
+                smoothed_l0 = torch.log(1 + weights / epsilon).sum() / len(
+                    weights
+                )
+
+            # Option 3: Exponential penalty
+            if penalty_approach == "l0_exp":
+                smoothed_l0 = (1 - torch.exp(-weights / epsilon)).mean()
+
+            if penalty_approach == "l1":
+                l1 = torch.mean(weights)
+                return rel_error_normalized.mean() + penalty_weight * l1
+            
+            return rel_error_normalized.mean() + penalty_weight * smoothed_l0
+
+        else:
+            return rel_error_normalized.mean()
+        
+    def prune_dataset(weights, epsilon=1e-3):
+        """
+        Prune dataset samples based on learned weights.
+        Returns indices of samples to keep.
+        """
+        importance_scores = weights.detach().cpu().numpy()
+        keep_indices = np.where(importance_scores > epsilon)[0]
+
+        return keep_indices
 
     def dropout_weights(weights, p):
         if p == 0:
@@ -207,9 +267,9 @@ def generate(self):
             loss_matrix, targets_array = build_loss_matrix(
                 self.input_dataset, year
             )
-            zero_mask = np.isclose(targets_array, 0.0, atol=0.1)
+
             bad_mask = loss_matrix.columns.isin(bad_targets)
-            keep_mask_bool = ~(zero_mask | bad_mask)
+            keep_mask_bool = ~bad_mask
             keep_idx = np.where(keep_mask_bool)[0]
             loss_matrix_clean = loss_matrix.iloc[:, keep_idx]
             targets_array_clean = targets_array[keep_idx]
@@ -220,7 +280,7 @@ def generate(self):
                 loss_matrix_clean,
                 targets_array_clean,
                 log_path="calibration_log.csv",
-                epochs=150,
+                epochs= 150,
             )
             data["household_weight"][year] = optimised_weights
 
diff --git a/policyengine_us_data/storage/upload_completed_datasets.py b/policyengine_us_data/storage/upload_completed_datasets.py
index f161a9ee..16885d8c 100644
--- a/policyengine_us_data/storage/upload_completed_datasets.py
+++ b/policyengine_us_data/storage/upload_completed_datasets.py
@@ -15,6 +15,7 @@ def upload_datasets():
         Pooled_3_Year_CPS_2023.file_path,
         CPS_2023.file_path,
         STORAGE_FOLDER / "small_enhanced_cps_2024.h5",
+        STORAGE_FOLDER / "enhanced_cps_2024_minified.h5",
     ]
 
     for file_path in dataset_files:
diff --git a/policyengine_us_data/utils/loss.py b/policyengine_us_data/utils/loss.py
index 21abce0f..fbdbacef 100644
--- a/policyengine_us_data/utils/loss.py
+++ b/policyengine_us_data/utils/loss.py
@@ -552,11 +552,6 @@ def build_loss_matrix(dataset: type, time_period):
         # Convert to thousands for the target
         targets_array.append(row["enrollment"])
 
-        print(
-            f"Targeting Medicaid enrollment for {row['state']} "
-            f"with target {row['enrollment']:.0f}k"
-        )
-
     # State 10-year age targets
 
     age_targets = pd.read_csv(STORAGE_FOLDER / "age_state.csv")
diff --git a/policyengine_us_data/utils/minimize.py b/policyengine_us_data/utils/minimize.py
new file mode 100644
index 00000000..ce2c6fdf
--- /dev/null
+++ b/policyengine_us_data/utils/minimize.py
@@ -0,0 +1,444 @@
+from policyengine_us_data.utils.loss import build_loss_matrix
+from policyengine_core.data import Dataset
+from policyengine_us import Microsimulation
+import numpy as np
+import pandas as pd
+import h5py
+from policyengine_us_data.storage import STORAGE_FOLDER
+from typing import Optional, Callable
+
+bad_targets = [
+    "nation/irs/adjusted gross income/total/AGI in 10k-15k/taxable/Head of Household",
+    "nation/irs/adjusted gross income/total/AGI in 15k-20k/taxable/Head of Household",
+    "nation/irs/adjusted gross income/total/AGI in 10k-15k/taxable/Married Filing Jointly/Surviving Spouse",
+    "nation/irs/adjusted gross income/total/AGI in 15k-20k/taxable/Married Filing Jointly/Surviving Spouse",
+    "nation/irs/count/count/AGI in 10k-15k/taxable/Head of Household",
+    "nation/irs/count/count/AGI in 15k-20k/taxable/Head of Household",
+    "nation/irs/count/count/AGI in 10k-15k/taxable/Married Filing Jointly/Surviving Spouse",
+    "nation/irs/count/count/AGI in 15k-20k/taxable/Married Filing Jointly/Surviving Spouse",
+]
+
+
+def create_calibration_log_file(file_path, epoch=0):
+    dataset = Dataset.from_file(file_path)
+    sim = Microsimulation(dataset=dataset)
+
+    loss_matrix, targets = build_loss_matrix(dataset, 2024)
+
+    bad_mask = loss_matrix.columns.isin(bad_targets)
+    keep_mask_bool = ~bad_mask
+    keep_idx = np.where(keep_mask_bool)[0]
+    loss_matrix_clean = loss_matrix.iloc[:, keep_idx]
+    targets_clean = targets[keep_idx]
+
+    assert loss_matrix_clean.shape[1] == targets_clean.size
+
+    estimates = (
+        sim.calculate("household_weight", 2024).values @ loss_matrix_clean
+    )
+    target_names = loss_matrix_clean.columns
+
+    # Calculate and print some key metrics
+    errors = estimates - targets_clean
+    rel_errors = errors / targets_clean
+
+    df = pd.DataFrame(
+        {
+            "target_name": target_names,
+            "estimate": estimates,
+            "target": targets_clean,
+        }
+    )
+    df["epoch"] = epoch
+    df["error"] = df["estimate"] - df["target"]
+    df["rel_error"] = df["error"] / df["target"]
+    df["abs_error"] = df["error"].abs()
+    df["rel_abs_error"] = (
+        df["abs_error"] / df["target"].abs()
+        if df["target"].abs().sum() > 0
+        else np.nan
+    )
+    df["loss"] = (df["rel_error"] ** 2).mean()
+
+    df.to_csv(
+        str(file_path).replace(".h5", "_calibration_log.csv"), index=False
+    )
+
+
+def losses_for_candidates(
+    base_weights: np.ndarray,
+    idxs: np.ndarray,
+    est_mat: np.ndarray,
+    targets: np.ndarray,
+    norm: np.ndarray,
+    chunk_size: Optional[int] = 25_000,
+) -> np.ndarray:
+    """
+    Return the loss value *for each* candidate deletion in `idxs`
+    in one matrix multiplication.
+
+    Parameters
+    ----------
+    base_weights : (n,) original weight vector
+    idxs         : (k,) candidate row indices to zero-out
+    est_mat      : (n, m) estimate matrix
+    targets      : (m,) calibration targets
+    norm         : (m,) normalisation factors
+    chunk_size   : max number of candidates to process at once
+
+    Returns
+    -------
+    losses       : (k,) loss if row i were removed (and weights rescaled)
+    """
+    W = base_weights
+    total = W.sum()
+    k = len(idxs)
+    losses = np.empty(k, dtype=float)
+
+    # Work through the candidate list in blocks
+    for start in range(0, k, chunk_size):
+        stop = min(start + chunk_size, k)
+        part = idxs[start:stop]  # (p,) where p ≤ chunk_size
+        p = len(part)
+
+        # Build the delta matrix only for this chunk
+        delta = np.zeros((p, len(W)))
+        delta[np.arange(p), part] = -W[part]
+
+        keep_total = total + delta.sum(axis=1)  # (p,)
+        delta *= (total / keep_total)[:, None]
+
+        # Matrix–matrix multiply → one matrix multiplication per chunk
+        ests = (W + delta) @ est_mat  # (p, m)
+        rel_err = ((ests - targets) + 1) / (targets + 1)
+        losses[start:stop] = ((rel_err * norm) ** 2).mean(axis=1)
+
+    return losses
+
+
+def get_loss_from_mask(
+    weights, inclusion_mask, estimate_matrix, targets, normalisation_factor
+):
+    """
+    Calculate the loss based on the inclusion mask and the estimate matrix.
+    """
+    # Step 1: Apply mask and rescale weights
+    masked_weights = weights.copy()
+    original_weight_total = masked_weights.sum()
+    if (~inclusion_mask).sum() > 0:
+        masked_weights[~inclusion_mask] = 0
+    masked_weight_total = masked_weights.sum()
+    masked_weights[inclusion_mask] *= (
+        original_weight_total / masked_weight_total
+    )
+
+    # Step 2: Re-calibrate the masked weights to hit targets
+    # Only calibrate the included households
+    included_weights = masked_weights[inclusion_mask]
+    included_estimate_matrix = estimate_matrix.iloc[
+        inclusion_mask
+    ]  # Keep as DataFrame
+
+    # Call reweight function to calibrate the selected households
+    from policyengine_us_data.datasets.cps.enhanced_cps import reweight
+
+    calibrated_weights_included = reweight(
+        included_weights,
+        included_estimate_matrix,
+        targets,
+        epochs=250,
+    )
+
+    # Put calibrated weights back into full array
+    calibrated_weights = np.zeros_like(masked_weights)
+    calibrated_weights[inclusion_mask] = calibrated_weights_included
+
+    # Calculate estimates and loss from calibrated weights
+    estimates = calibrated_weights @ estimate_matrix
+    rel_error = ((estimates - targets) + 1) / (targets + 1)
+    loss = ((rel_error * normalisation_factor) ** 2).mean()
+
+    return loss
+
+
+def candidate_loss_contribution(
+    weights: np.ndarray,
+    estimate_matrix: np.ndarray,
+    targets: np.ndarray,
+    normalisation_factor: np.ndarray,
+    loss_rel_change_max: float,
+    count_iterations: int = 5,
+    view_fraction_per_iteration: float = 0.5,
+    fraction_remove_per_iteration: float = 0.05,
+) -> np.ndarray:
+    """
+    Minimization approach based on candidate loss contribution.
+
+    This function iteratively removes households that contribute least to the loss,
+    maintaining the calibration quality within the specified tolerance.
+
+    Parameters
+    ----------
+    weights : (n,) household weights
+    estimate_matrix : (n, m) matrix mapping weights to estimates
+    targets : (m,) calibration targets
+    normalisation_factor : (m,) normalisation factors for different targets
+    loss_rel_change_max : maximum allowed relative change in loss
+    count_iterations : number of iterations to perform
+    view_fraction_per_iteration : fraction of households to evaluate each iteration
+    fraction_remove_per_iteration : fraction of households to remove each iteration
+
+    Returns
+    -------
+    inclusion_mask : (n,) boolean mask of households to keep
+    """
+    from tqdm import tqdm
+
+    full_mask = np.ones_like(weights, dtype=bool)
+
+    for i in range(count_iterations):
+        inclusion_mask = full_mask.copy()
+        baseline_loss = get_loss_from_mask(
+            weights,
+            inclusion_mask,
+            estimate_matrix,
+            targets,
+            normalisation_factor,
+        )
+
+        # Sample only households that are currently included
+        indices = np.random.choice(
+            np.where(full_mask)[0],
+            size=int(full_mask.sum() * view_fraction_per_iteration),
+            replace=False,
+        )
+        # 2. compute losses for the batch in one shot
+        candidate_losses = losses_for_candidates(
+            weights, indices, estimate_matrix, targets, normalisation_factor
+        )
+        # 3. convert to relative change vs. baseline
+        household_loss_rel_changes = (
+            candidate_losses - baseline_loss
+        ) / baseline_loss
+
+        inclusion_mask = full_mask.copy()
+        household_loss_rel_changes = np.array(household_loss_rel_changes)
+        # Sort by the relative change in loss
+        sorted_indices = np.argsort(household_loss_rel_changes)
+
+        # Remove the worst households
+        num_to_remove = int(len(weights) * fraction_remove_per_iteration)
+        worst_indices = indices[sorted_indices[:num_to_remove]]
+        inclusion_mask[worst_indices] = False
+
+        # Calculate the new loss
+        new_loss = get_loss_from_mask(
+            weights,
+            inclusion_mask,
+            estimate_matrix,
+            targets,
+            normalisation_factor,
+        )
+        rel_change = (new_loss - baseline_loss) / baseline_loss
+
+        if rel_change > loss_rel_change_max:
+            print(
+                f"Iteration {i + 1}: Loss changed from {baseline_loss} to {new_loss}, "
+                f"which is too high ({rel_change:.2%}). Stopping."
+            )
+            break
+
+        print(
+            f"Iteration {i + 1}: Loss changed from {baseline_loss} to {new_loss}"
+        )
+        print(
+            f"Removed {num_to_remove} households with worst relative loss changes."
+        )
+
+        # Update the full mask
+        full_mask &= inclusion_mask
+
+    return full_mask
+
+
+def random_sampling_minimization(
+    weights,
+    estimate_matrix,
+    targets,
+    normalisation_factor,
+    random=True,
+    target_fractions=[0.5, 0.6, 0.7, 0.8, 0.9],
+):
+    """A simple random sampling approach"""
+    n = len(weights)
+
+    household_weights_normalized = weights / weights.sum()
+
+    final_mask = None
+    lowest_loss = float("inf")
+    for fraction in target_fractions:
+        target_size = int(n * fraction)
+        # Random sampling with multiple attempts
+        best_mask = None
+        best_loss = float("inf")
+
+        for _ in range(3):  # Try 3 random samples
+            mask = np.zeros(n, dtype=bool)
+            mask[
+                np.random.choice(
+                    n,
+                    target_size,
+                    p=household_weights_normalized if random else None,
+                    replace=False,
+                )
+            ] = True
+
+            loss = get_loss_from_mask(
+                weights, mask, estimate_matrix, targets, normalisation_factor
+            )
+
+            if loss < best_loss:
+                best_loss = loss
+                best_mask = mask
+
+        if lowest_loss > best_loss:
+            lowest_loss = best_loss
+            final_mask = best_mask
+
+    return final_mask
+
+
+def minimize_dataset(
+    dataset,
+    output_path: str,
+    minimization_function: Callable = candidate_loss_contribution,
+    loss_matrix: Optional[pd.DataFrame] = None,
+    targets: Optional[np.ndarray] = None,
+    **kwargs,
+) -> None:
+    """
+    Main function to minimize a dataset using a specified minimization approach.
+
+    Parameters
+    ----------
+    dataset : path to the dataset file or Dataset object
+    output_path : path where the minimized dataset will be saved
+    loss_rel_change_max : maximum allowed relative change in loss
+    minimization_function : function that implements the minimization logic
+    **kwargs : additional arguments to pass to the minimization function
+    """
+    # Handle both dataset class and file path
+    if hasattr(dataset, "file_path"):
+        dataset_path = str(dataset.file_path)
+    else:
+        dataset_path = str(dataset)
+
+    create_calibration_log_file(dataset_path)
+
+    dataset = Dataset.from_file(dataset_path)
+    if loss_matrix is None or targets is None:
+        loss_matrix, targets = build_loss_matrix(dataset, 2024)
+
+        bad_mask = loss_matrix.columns.isin(bad_targets)
+        keep_mask_bool = ~bad_mask
+        keep_idx = np.where(keep_mask_bool)[0]
+        loss_matrix_clean = loss_matrix.iloc[:, keep_idx]
+        targets_clean = targets[keep_idx]
+        assert loss_matrix_clean.shape[1] == targets_clean.size
+    else:
+        loss_matrix_clean = loss_matrix
+        targets_clean = targets
+
+    sim = Microsimulation(dataset=dataset)
+
+    weights = sim.calculate("household_weight", 2024).values
+    is_national = loss_matrix_clean.columns.str.startswith("nation/")
+    nation_normalisation_factor = is_national * (1 / is_national.sum())
+    state_normalisation_factor = ~is_national * (1 / (~is_national).sum())
+    normalisation_factor = np.where(
+        is_national, nation_normalisation_factor, state_normalisation_factor
+    )
+
+    # Call the minimization function
+    inclusion_mask = minimization_function(
+        weights=weights,
+        estimate_matrix=loss_matrix_clean,
+        targets=targets_clean,
+        normalisation_factor=normalisation_factor,
+        **kwargs,  # Allows for passing either loss_rel_change_max OR target_fractions, depending on normalisation_factor choice.
+    )
+
+    # Extract household IDs for remaining households
+    household_ids = sim.calculate("household_id", 2024).values
+    remaining_households = household_ids[inclusion_mask]
+
+    # Create a smaller dataset with only the remaining households
+    df = sim.to_input_dataframe()
+    smaller_df = df[df["household_id__2024"].isin(remaining_households)]
+
+    weight_rel_change = (
+        smaller_df["household_weight__2024"].sum()
+        / df["household_weight__2024"].sum()
+    )
+    print(f"Weight relative change: {weight_rel_change:.2%}")
+
+    # Create new simulation with smaller dataset
+    sim = Microsimulation(dataset=smaller_df)
+
+    # Rescale weights to maintain total
+    initial_weights = (
+        sim.calculate("household_weight", 2024).values / weight_rel_change
+    )
+
+    # Re-calibrate the final selected households to hit targets
+    print("Re-calibrating final selected households...")
+
+    # Build loss matrix for the smaller dataset
+    smaller_loss_matrix, smaller_targets = build_loss_matrix(sim.dataset, 2024)
+
+    # Apply same filtering as before
+    bad_mask = smaller_loss_matrix.columns.isin(bad_targets)
+    keep_mask_bool = ~bad_mask
+    keep_idx = np.where(keep_mask_bool)[0]
+    smaller_loss_matrix_clean = smaller_loss_matrix.iloc[:, keep_idx]
+    smaller_targets_clean = smaller_targets[keep_idx]
+
+    from policyengine_us_data.datasets.cps.enhanced_cps import reweight
+
+    calibrated_weights = reweight(
+        initial_weights,
+        smaller_loss_matrix_clean,  # Now matches the smaller dataset size
+        smaller_targets_clean,
+        epochs=250,  # Reduced epochs for faster processing
+    )
+    sim.set_input("household_weight", 2024, calibrated_weights)
+    print("Final calibration completed successfully")
+    # Prepare data for saving
+    data = {}
+    for variable in sim.input_variables:
+        data[variable] = {2024: sim.calculate(variable, 2024).values}
+        if data[variable][2024].dtype == "object":
+            data[variable][2024] = data[variable][2024].astype("S")
+
+    # Save to HDF5 file
+    with h5py.File(output_path, "w") as f:
+        for variable, values in data.items():
+            for year, value in values.items():
+                f.create_dataset(f"{variable}/{year}", data=value)
+
+    print(f"Saved minimised dataset to {output_path}")
+    create_calibration_log_file(output_path, epoch=250)
+
+
+if __name__ == "__main__":
+    # Example usage
+    files = [
+        STORAGE_FOLDER / "enhanced_cps_2024.h5",
+    ]
+
+    for file in files:
+        output_path = file.with_name(file.stem + "_minimised.h5")
+        minimize_dataset(
+            file,
+            output_path,
+        )
diff --git a/test_minimization_approach.ipynb b/test_minimization_approach.ipynb
new file mode 100644
index 00000000..a4bd87be
--- /dev/null
+++ b/test_minimization_approach.ipynb
@@ -0,0 +1,1634 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "037988b0",
+   "metadata": {},
+   "source": [
+    "# Testing Arena for Different Regularization Strategies"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "268ab898",
+   "metadata": {},
+   "source": [
+    "#### Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "d6dc9cca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from policyengine_us_data.utils.minimize import minimize_dataset, random_sampling_minimization, candidate_loss_contribution\n",
+    "from policyengine_us_data.storage import STORAGE_FOLDER\n",
+    "from policyengine_us import Microsimulation\n",
+    "from policyengine_us_data.datasets.cps.enhanced_cps import reweight, ExtendedCPS_2024\n",
+    "from policyengine_us_data.utils import build_loss_matrix\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import h5py\n",
+    "import pandas as pd\n",
+    "import plotly.express as px\n",
+    "\n",
+    "\n",
+    "bad_targets = [\n",
+    "    \"nation/irs/adjusted gross income/total/AGI in 10k-15k/taxable/Head of Household\",\n",
+    "    \"nation/irs/adjusted gross income/total/AGI in 15k-20k/taxable/Head of Household\",\n",
+    "    \"nation/irs/adjusted gross income/total/AGI in 10k-15k/taxable/Married Filing Jointly/Surviving Spouse\",\n",
+    "    \"nation/irs/adjusted gross income/total/AGI in 15k-20k/taxable/Married Filing Jointly/Surviving Spouse\",\n",
+    "    \"nation/irs/count/count/AGI in 10k-15k/taxable/Head of Household\",\n",
+    "    \"nation/irs/count/count/AGI in 15k-20k/taxable/Head of Household\",\n",
+    "    \"nation/irs/count/count/AGI in 10k-15k/taxable/Married Filing Jointly/Surviving Spouse\",\n",
+    "    \"nation/irs/count/count/AGI in 15k-20k/taxable/Married Filing Jointly/Surviving Spouse\",\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "683fd57e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Length of household entity in the dataset measured through household_weight:\n",
+    "\n",
+    "# Original ECPS 2024 dataset size: 41310\n",
+    "# Through \"random_sampling_minimization\" with 0.5 of the dataset being pruned: 20655\n",
+    "# Through \"random_sampling_minimization\" with 0.2 of the dataset being pruned: 33408\n",
+    "# After minimization through \"candidate_loss_contribution\" and a 1.0 max error change: 20655 \n",
+    "# After minimization through \"candidate_loss_contribution\" and a 0.001 max error change: 24786"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e99994d3",
+   "metadata": {},
+   "source": [
+    "## Enhanced_CPS_2024.py Regularization Approaches"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "db975ac1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## ALL TESTS\n",
+    "\n",
+    "## For L1 and L0 penalty approaches which are integrated into the enhanced CPS dataset creation\n",
+    "input_dataset = ExtendedCPS_2024\n",
+    "\n",
+    "approaches = [\"l0_sigmoid\"]#, \"l0_log\", \"l0_exp\", \"l1\"]\n",
+    "penalty_weights = [1e-5]#, 1e-4, 1e-3, 1e-2, 1e-1]\n",
+    "\n",
+    "def get_output_path(approach, file_name):\n",
+    "    output_path = STORAGE_FOLDER / approach / file_name\n",
+    "    output_path.parent.mkdir(parents=True, exist_ok=True)\n",
+    "    return output_path\n",
+    "\n",
+    "results = []\n",
+    "\n",
+    "for approach in approaches:\n",
+    "            for penalty_weight in penalty_weights:\n",
+    "                # Storing files in correct locations\n",
+    "                cal_log_name = f\"calibration_log_{approach}_{penalty_weight}.csv\"\n",
+    "                h5_name = f\"enhanced_cps_2024_{approach}_{penalty_weight}_minimised.h5\"\n",
+    "                cal_log_path = get_output_path(approach, cal_log_name)\n",
+    "                h5_path = get_output_path(approach, h5_name)\n",
+    "\n",
+    "                sim = Microsimulation(dataset=input_dataset)\n",
+    "                data = sim.dataset.load_dataset()\n",
+    "                data[\"household_weight\"] = {}\n",
+    "                original_weights = sim.calculate(\"household_weight\")\n",
+    "                original_weights = original_weights.values + np.random.normal(\n",
+    "                    1, 0.1, len(original_weights)\n",
+    "                )\n",
+    "                for year in range(2024, 2025):\n",
+    "                    loss_matrix, targets_array = build_loss_matrix(\n",
+    "                        input_dataset, year\n",
+    "                    )\n",
+    "\n",
+    "                    bad_mask = loss_matrix.columns.isin(bad_targets)\n",
+    "                    keep_mask_bool = ~bad_mask\n",
+    "                    keep_idx = np.where(keep_mask_bool)[0]\n",
+    "                    loss_matrix_clean = loss_matrix.iloc[:, keep_idx]\n",
+    "                    targets_array_clean = targets_array[keep_idx]\n",
+    "                    assert loss_matrix_clean.shape[1] == targets_array_clean.size\n",
+    "\n",
+    "                    optimised_weights = reweight(\n",
+    "                        original_weights,\n",
+    "                        loss_matrix_clean,\n",
+    "                        targets_array_clean,\n",
+    "                        log_path=cal_log_path, \n",
+    "                        penalty_approach=approach,\n",
+    "                        penalty_weight=penalty_weight, \n",
+    "                        epochs=250,  # Reduced epochs for faster processing\n",
+    "                    )\n",
+    "                    keep_indices = prune_dataset(optimised_weights, epsilon=1e-3)\n",
+    "                    pruned_weights = optimised_weights[keep_indices]\n",
+    "                    \n",
+    "                    data[\"household_weight\"][year] = pruned_weights\n",
+    "\n",
+    "                # Save to HDF5 file\n",
+    "                with h5py.File(h5_path, \"w\") as f:\n",
+    "                    for variable, values in data.items():\n",
+    "                        for year, value in values.items():\n",
+    "                            f.create_dataset(f\"{variable}/{year}\", data=value)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "69ff392d",
+   "metadata": {},
+   "source": [
+    "## Minimize.py Regularization Approaches"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "aeab67b3",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[17], line 31\u001b[0m\n\u001b[1;32m     29\u001b[0m         output_path \u001b[38;5;241m=\u001b[39m STORAGE_FOLDER \u001b[38;5;241m/\u001b[39m approach \u001b[38;5;241m/\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvalue\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_enhanced_cps_2024_minimised.h5\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     30\u001b[0m         output_path\u001b[38;5;241m.\u001b[39mparent\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m---> 31\u001b[0m         \u001b[43mminimize_dataset\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     32\u001b[0m \u001b[43m            \u001b[49m\u001b[43mfile\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     33\u001b[0m \u001b[43m            \u001b[49m\u001b[43moutput_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     34\u001b[0m \u001b[43m            \u001b[49m\u001b[43mminimization_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mminimization_function\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m     35\u001b[0m \u001b[43m            \u001b[49m\u001b[43mtarget_fractions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m     36\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     37\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m params \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mloss_rel_change_max\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m     38\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m files:\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/policyengine_us_data/utils/minimize.py:336\u001b[0m, in \u001b[0;36mminimize_dataset\u001b[0;34m(dataset, output_path, minimization_function, loss_matrix, targets, **kwargs)\u001b[0m\n\u001b[1;32m    333\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    334\u001b[0m     dataset_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(dataset)\n\u001b[0;32m--> 336\u001b[0m \u001b[43mcreate_calibration_log_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    338\u001b[0m dataset \u001b[38;5;241m=\u001b[39m Dataset\u001b[38;5;241m.\u001b[39mfrom_file(dataset_path)\n\u001b[1;32m    339\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m loss_matrix \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m targets \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/policyengine_us_data/utils/minimize.py:26\u001b[0m, in \u001b[0;36mcreate_calibration_log_file\u001b[0;34m(file_path, epoch)\u001b[0m\n\u001b[1;32m     23\u001b[0m dataset \u001b[38;5;241m=\u001b[39m Dataset\u001b[38;5;241m.\u001b[39mfrom_file(file_path)\n\u001b[1;32m     24\u001b[0m sim \u001b[38;5;241m=\u001b[39m Microsimulation(dataset\u001b[38;5;241m=\u001b[39mdataset)\n\u001b[0;32m---> 26\u001b[0m loss_matrix, targets \u001b[38;5;241m=\u001b[39m \u001b[43mbuild_loss_matrix\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m2024\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     28\u001b[0m bad_mask \u001b[38;5;241m=\u001b[39m loss_matrix\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39misin(bad_targets)\n\u001b[1;32m     29\u001b[0m keep_mask_bool \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m~\u001b[39mbad_mask\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/policyengine_us_data/utils/loss.py:243\u001b[0m, in \u001b[0;36mbuild_loss_matrix\u001b[0;34m(dataset, time_period)\u001b[0m\n\u001b[1;32m    241\u001b[0m \u001b[38;5;66;03m# National ACA Spending\u001b[39;00m\n\u001b[1;32m    242\u001b[0m label \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnation/gov/aca_spending\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 243\u001b[0m loss_matrix[label] \u001b[38;5;241m=\u001b[39m \u001b[43msim\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    244\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43maca_ptc\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmap_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhousehold\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2025\u001b[39;49m\n\u001b[1;32m    245\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mvalues\n\u001b[1;32m    246\u001b[0m ACA_SPENDING_2024 \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m9.8e10\u001b[39m  \u001b[38;5;66;03m# 2024 outlays on PTC\u001b[39;00m\n\u001b[1;32m    247\u001b[0m targets_array\u001b[38;5;241m.\u001b[39mappend(ACA_SPENDING_2024)\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/microsimulation.py:54\u001b[0m, in \u001b[0;36mMicrosimulation.calculate\u001b[0;34m(self, variable_name, period, map_to, use_weights, decode_enums)\u001b[0m\n\u001b[1;32m     52\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m     53\u001b[0m     period \u001b[38;5;241m=\u001b[39m get_period(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period)\n\u001b[0;32m---> 54\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmap_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_enums\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     55\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m use_weights:\n\u001b[1;32m     56\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m values\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/simulation.py:485\u001b[0m, in \u001b[0;36mSimulation.calculate\u001b[0;34m(self, variable_name, period, map_to, decode_enums)\u001b[0m\n\u001b[1;32m    482\u001b[0m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mseed(\u001b[38;5;28mhash\u001b[39m(variable_name \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(period)) \u001b[38;5;241m%\u001b[39m \u001b[38;5;241m1000000\u001b[39m)\n\u001b[1;32m    484\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 485\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_calculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    486\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, EnumArray) \u001b[38;5;129;01mand\u001b[39;00m decode_enums:\n\u001b[1;32m    487\u001b[0m         result \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mdecode_to_str()\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/simulation.py:715\u001b[0m, in \u001b[0;36mSimulation._calculate\u001b[0;34m(self, variable_name, period)\u001b[0m\n\u001b[1;32m    713\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    714\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_for_cycle(variable\u001b[38;5;241m.\u001b[39mname, period)\n\u001b[0;32m--> 715\u001b[0m     array \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_formula\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpopulation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    717\u001b[0m     \u001b[38;5;66;03m# If no result, use the default value and cache it\u001b[39;00m\n\u001b[1;32m    718\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m array \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    719\u001b[0m         \u001b[38;5;66;03m# Check if the variable has a previously defined value\u001b[39;00m\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/simulation.py:1005\u001b[0m, in \u001b[0;36mSimulation._run_formula\u001b[0;34m(self, variable, population, period)\u001b[0m\n\u001b[1;32m   1003\u001b[0m     array \u001b[38;5;241m=\u001b[39m formula(population, period)\n\u001b[1;32m   1004\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1005\u001b[0m     array \u001b[38;5;241m=\u001b[39m \u001b[43mformula\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpopulation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters_at\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1007\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m array\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_us/variables/gov/aca/ptc/aca_ptc.py:14\u001b[0m, in \u001b[0;36maca_ptc.formula\u001b[0;34m(tax_unit, period, parameters)\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mformula\u001b[39m(tax_unit, period, parameters):\n\u001b[0;32m---> 14\u001b[0m     plan_cost \u001b[38;5;241m=\u001b[39m \u001b[43mtax_unit\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mslcsp\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     15\u001b[0m     income \u001b[38;5;241m=\u001b[39m tax_unit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maca_magi\u001b[39m\u001b[38;5;124m\"\u001b[39m, period)\n\u001b[1;32m     16\u001b[0m     applicable_figure \u001b[38;5;241m=\u001b[39m tax_unit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maca_ptc_phase_out_rate\u001b[39m\u001b[38;5;124m\"\u001b[39m, period)\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/populations/group_population.py:38\u001b[0m, in \u001b[0;36mGroupPopulation.__call__\u001b[0;34m(self, variable_name, period, options)\u001b[0m\n\u001b[1;32m     36\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msum(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmembers(variable_name, period, options))\n\u001b[1;32m     37\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 38\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__call__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/populations/population.py:137\u001b[0m, in \u001b[0;36mPopulation.__call__\u001b[0;34m(self, variable_name, period, options)\u001b[0m\n\u001b[1;32m    133\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msimulation\u001b[38;5;241m.\u001b[39mcalculate_divide(\n\u001b[1;32m    134\u001b[0m         variable_name, period, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcalculate_kwargs\n\u001b[1;32m    135\u001b[0m     )\n\u001b[1;32m    136\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 137\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msimulation\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    138\u001b[0m \u001b[43m        \u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mcalculate_kwargs\u001b[49m\n\u001b[1;32m    139\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/microsimulation.py:54\u001b[0m, in \u001b[0;36mMicrosimulation.calculate\u001b[0;34m(self, variable_name, period, map_to, use_weights, decode_enums)\u001b[0m\n\u001b[1;32m     52\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m     53\u001b[0m     period \u001b[38;5;241m=\u001b[39m get_period(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period)\n\u001b[0;32m---> 54\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmap_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_enums\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     55\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m use_weights:\n\u001b[1;32m     56\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m values\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/simulation.py:485\u001b[0m, in \u001b[0;36mSimulation.calculate\u001b[0;34m(self, variable_name, period, map_to, decode_enums)\u001b[0m\n\u001b[1;32m    482\u001b[0m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mseed(\u001b[38;5;28mhash\u001b[39m(variable_name \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(period)) \u001b[38;5;241m%\u001b[39m \u001b[38;5;241m1000000\u001b[39m)\n\u001b[1;32m    484\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 485\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_calculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    486\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, EnumArray) \u001b[38;5;129;01mand\u001b[39;00m decode_enums:\n\u001b[1;32m    487\u001b[0m         result \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mdecode_to_str()\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/simulation.py:681\u001b[0m, in \u001b[0;36mSimulation._calculate\u001b[0;34m(self, variable_name, period)\u001b[0m\n\u001b[1;32m    679\u001b[0m         values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_calculate(variable_name, contained_months[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m])\n\u001b[1;32m    680\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 681\u001b[0m         values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate_add\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    682\u001b[0m     alternate_period_handling \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m    683\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m variable\u001b[38;5;241m.\u001b[39mdefinition_period \u001b[38;5;241m==\u001b[39m YEAR \u001b[38;5;129;01mand\u001b[39;00m period\u001b[38;5;241m.\u001b[39munit \u001b[38;5;241m==\u001b[39m MONTH:\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/microsimulation.py:67\u001b[0m, in \u001b[0;36mMicrosimulation.calculate_add\u001b[0;34m(self, variable_name, period, map_to, use_weights)\u001b[0m\n\u001b[1;32m     60\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mcalculate_add\u001b[39m(\n\u001b[1;32m     61\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m     62\u001b[0m     variable_name: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     65\u001b[0m     use_weights: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m     66\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m MicroSeries:\n\u001b[0;32m---> 67\u001b[0m     values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate_add\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmap_to\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     68\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m use_weights:\n\u001b[1;32m     69\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m values\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/simulation.py:846\u001b[0m, in \u001b[0;36mSimulation.calculate_add\u001b[0;34m(self, variable_name, period, decode_enums)\u001b[0m\n\u001b[1;32m    835\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m variable\u001b[38;5;241m.\u001b[39mdefinition_period \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m [\n\u001b[1;32m    836\u001b[0m     periods\u001b[38;5;241m.\u001b[39mDAY,\n\u001b[1;32m    837\u001b[0m     periods\u001b[38;5;241m.\u001b[39mMONTH,\n\u001b[1;32m    838\u001b[0m     periods\u001b[38;5;241m.\u001b[39mYEAR,\n\u001b[1;32m    839\u001b[0m ]:\n\u001b[1;32m    840\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    841\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnable to sum constant variable \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m over period \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m: only variables defined daily, monthly, or yearly can be summed over time.\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[1;32m    842\u001b[0m             variable\u001b[38;5;241m.\u001b[39mname, period\n\u001b[1;32m    843\u001b[0m         )\n\u001b[1;32m    844\u001b[0m     )\n\u001b[0;32m--> 846\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msum\u001b[39m(\n\u001b[1;32m    847\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcalculate(variable_name, sub_period)\n\u001b[1;32m    848\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m sub_period \u001b[38;5;129;01min\u001b[39;00m period\u001b[38;5;241m.\u001b[39mget_subperiods(variable\u001b[38;5;241m.\u001b[39mdefinition_period)\n\u001b[1;32m    849\u001b[0m )\n\u001b[1;32m    850\u001b[0m holder \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_holder(variable\u001b[38;5;241m.\u001b[39mname)\n\u001b[1;32m    851\u001b[0m holder\u001b[38;5;241m.\u001b[39mput_in_cache(result, period, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbranch_name)\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/simulation.py:847\u001b[0m, in \u001b[0;36m<genexpr>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    835\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m variable\u001b[38;5;241m.\u001b[39mdefinition_period \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m [\n\u001b[1;32m    836\u001b[0m     periods\u001b[38;5;241m.\u001b[39mDAY,\n\u001b[1;32m    837\u001b[0m     periods\u001b[38;5;241m.\u001b[39mMONTH,\n\u001b[1;32m    838\u001b[0m     periods\u001b[38;5;241m.\u001b[39mYEAR,\n\u001b[1;32m    839\u001b[0m ]:\n\u001b[1;32m    840\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    841\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnable to sum constant variable \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m over period \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m: only variables defined daily, monthly, or yearly can be summed over time.\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[1;32m    842\u001b[0m             variable\u001b[38;5;241m.\u001b[39mname, period\n\u001b[1;32m    843\u001b[0m         )\n\u001b[1;32m    844\u001b[0m     )\n\u001b[1;32m    846\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msum\u001b[39m(\n\u001b[0;32m--> 847\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msub_period\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    848\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m sub_period \u001b[38;5;129;01min\u001b[39;00m period\u001b[38;5;241m.\u001b[39mget_subperiods(variable\u001b[38;5;241m.\u001b[39mdefinition_period)\n\u001b[1;32m    849\u001b[0m )\n\u001b[1;32m    850\u001b[0m holder \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_holder(variable\u001b[38;5;241m.\u001b[39mname)\n\u001b[1;32m    851\u001b[0m holder\u001b[38;5;241m.\u001b[39mput_in_cache(result, period, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbranch_name)\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/microsimulation.py:54\u001b[0m, in \u001b[0;36mMicrosimulation.calculate\u001b[0;34m(self, variable_name, period, map_to, use_weights, decode_enums)\u001b[0m\n\u001b[1;32m     52\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m     53\u001b[0m     period \u001b[38;5;241m=\u001b[39m get_period(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period)\n\u001b[0;32m---> 54\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmap_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_enums\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     55\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m use_weights:\n\u001b[1;32m     56\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m values\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/simulation.py:485\u001b[0m, in \u001b[0;36mSimulation.calculate\u001b[0;34m(self, variable_name, period, map_to, decode_enums)\u001b[0m\n\u001b[1;32m    482\u001b[0m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mseed(\u001b[38;5;28mhash\u001b[39m(variable_name \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(period)) \u001b[38;5;241m%\u001b[39m \u001b[38;5;241m1000000\u001b[39m)\n\u001b[1;32m    484\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 485\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_calculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    486\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, EnumArray) \u001b[38;5;129;01mand\u001b[39;00m decode_enums:\n\u001b[1;32m    487\u001b[0m         result \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mdecode_to_str()\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/simulation.py:715\u001b[0m, in \u001b[0;36mSimulation._calculate\u001b[0;34m(self, variable_name, period)\u001b[0m\n\u001b[1;32m    713\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    714\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_for_cycle(variable\u001b[38;5;241m.\u001b[39mname, period)\n\u001b[0;32m--> 715\u001b[0m     array \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_formula\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpopulation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    717\u001b[0m     \u001b[38;5;66;03m# If no result, use the default value and cache it\u001b[39;00m\n\u001b[1;32m    718\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m array \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    719\u001b[0m         \u001b[38;5;66;03m# Check if the variable has a previously defined value\u001b[39;00m\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/simulation.py:940\u001b[0m, in \u001b[0;36mSimulation._run_formula\u001b[0;34m(self, variable, population, period)\u001b[0m\n\u001b[1;32m    938\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m added_variable \u001b[38;5;129;01min\u001b[39;00m adds_list:\n\u001b[1;32m    939\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m added_variable \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtax_benefit_system\u001b[38;5;241m.\u001b[39mvariables:\n\u001b[0;32m--> 940\u001b[0m         values \u001b[38;5;241m=\u001b[39m values \u001b[38;5;241m+\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    941\u001b[0m \u001b[43m            \u001b[49m\u001b[43madded_variable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmap_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mentity\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkey\u001b[49m\n\u001b[1;32m    942\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    943\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    944\u001b[0m         \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/microsimulation.py:54\u001b[0m, in \u001b[0;36mMicrosimulation.calculate\u001b[0;34m(self, variable_name, period, map_to, use_weights, decode_enums)\u001b[0m\n\u001b[1;32m     52\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m     53\u001b[0m     period \u001b[38;5;241m=\u001b[39m get_period(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault_calculation_period)\n\u001b[0;32m---> 54\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmap_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdecode_enums\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     55\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m use_weights:\n\u001b[1;32m     56\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m values\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/simulation.py:485\u001b[0m, in \u001b[0;36mSimulation.calculate\u001b[0;34m(self, variable_name, period, map_to, decode_enums)\u001b[0m\n\u001b[1;32m    482\u001b[0m np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mseed(\u001b[38;5;28mhash\u001b[39m(variable_name \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(period)) \u001b[38;5;241m%\u001b[39m \u001b[38;5;241m1000000\u001b[39m)\n\u001b[1;32m    484\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 485\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_calculate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    486\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, EnumArray) \u001b[38;5;129;01mand\u001b[39;00m decode_enums:\n\u001b[1;32m    487\u001b[0m         result \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mdecode_to_str()\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/simulation.py:715\u001b[0m, in \u001b[0;36mSimulation._calculate\u001b[0;34m(self, variable_name, period)\u001b[0m\n\u001b[1;32m    713\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    714\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_for_cycle(variable\u001b[38;5;241m.\u001b[39mname, period)\n\u001b[0;32m--> 715\u001b[0m     array \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_formula\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvariable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpopulation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    717\u001b[0m     \u001b[38;5;66;03m# If no result, use the default value and cache it\u001b[39;00m\n\u001b[1;32m    718\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m array \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    719\u001b[0m         \u001b[38;5;66;03m# Check if the variable has a previously defined value\u001b[39;00m\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_core/simulations/simulation.py:1005\u001b[0m, in \u001b[0;36mSimulation._run_formula\u001b[0;34m(self, variable, population, period)\u001b[0m\n\u001b[1;32m   1003\u001b[0m     array \u001b[38;5;241m=\u001b[39m formula(population, period)\n\u001b[1;32m   1004\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1005\u001b[0m     array \u001b[38;5;241m=\u001b[39m \u001b[43mformula\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpopulation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters_at\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1007\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m array\n",
+      "File \u001b[0;32m~/Desktop/PolicyEngine/policyengine-us-data/.venv/lib/python3.11/site-packages/policyengine_us/variables/gov/aca/slspc/slcsp_age_curve_amount_person.py:27\u001b[0m, in \u001b[0;36mslcsp_age_curve_amount_person.formula\u001b[0;34m(person, period, parameters)\u001b[0m\n\u001b[1;32m     19\u001b[0m p \u001b[38;5;241m=\u001b[39m parameters(period)\u001b[38;5;241m.\u001b[39mgov\u001b[38;5;241m.\u001b[39maca\u001b[38;5;241m.\u001b[39mage_curves\n\u001b[1;32m     21\u001b[0m \u001b[38;5;66;03m# Handle other states with regular bracket structures\u001b[39;00m\n\u001b[1;32m     22\u001b[0m multiplier \u001b[38;5;241m=\u001b[39m select(\n\u001b[1;32m     23\u001b[0m     [\n\u001b[1;32m     24\u001b[0m         state_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAL\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     25\u001b[0m         state_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDC\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     26\u001b[0m         state_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMA\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m---> 27\u001b[0m         \u001b[43mstate_code\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mMN\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m,\n\u001b[1;32m     28\u001b[0m         state_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMS\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     29\u001b[0m         state_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOR\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     30\u001b[0m         state_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUT\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     31\u001b[0m     ],\n\u001b[1;32m     32\u001b[0m     [\n\u001b[1;32m     33\u001b[0m         p\u001b[38;5;241m.\u001b[39mal\u001b[38;5;241m.\u001b[39mcalc(age),\n\u001b[1;32m     34\u001b[0m         p\u001b[38;5;241m.\u001b[39mdc\u001b[38;5;241m.\u001b[39mcalc(age),\n\u001b[1;32m     35\u001b[0m         p\u001b[38;5;241m.\u001b[39mma\u001b[38;5;241m.\u001b[39mcalc(age),\n\u001b[1;32m     36\u001b[0m         p\u001b[38;5;241m.\u001b[39mmn\u001b[38;5;241m.\u001b[39mcalc(age),\n\u001b[1;32m     37\u001b[0m         p\u001b[38;5;241m.\u001b[39mms\u001b[38;5;241m.\u001b[39mcalc(age),\n\u001b[1;32m     38\u001b[0m         p[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mor\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mcalc(age),\n\u001b[1;32m     39\u001b[0m         p\u001b[38;5;241m.\u001b[39mut\u001b[38;5;241m.\u001b[39mcalc(age),\n\u001b[1;32m     40\u001b[0m     ],\n\u001b[1;32m     41\u001b[0m     default\u001b[38;5;241m=\u001b[39mp\u001b[38;5;241m.\u001b[39mdefault\u001b[38;5;241m.\u001b[39mcalc(age),\n\u001b[1;32m     42\u001b[0m )\n\u001b[1;32m     43\u001b[0m age_curve_applies \u001b[38;5;241m=\u001b[39m person\u001b[38;5;241m.\u001b[39mtax_unit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mslcsp_age_curve_applies\u001b[39m\u001b[38;5;124m\"\u001b[39m, period)\n\u001b[1;32m     44\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m base_cost \u001b[38;5;241m*\u001b[39m multiplier \u001b[38;5;241m*\u001b[39m age_curve_applies\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "## For approaches external to the reweighting approach implemented in enhanced CPS dataset creation\n",
+    "\n",
+    "files = [\n",
+    "        STORAGE_FOLDER / \"enhanced_cps_2024.h5\",\n",
+    "    ]\n",
+    "\n",
+    "approaches = {\n",
+    "        \"random_sampling_minimization\": random_sampling_minimization,\n",
+    "        \"candidate_loss_contribution\": candidate_loss_contribution,\n",
+    "}\n",
+    "\n",
+    "optional_params = {\n",
+    "        \"random_sampling_minimization\": {\n",
+    "            \"target_fractions\": [0.7, 0.8, 0.9]#, 0.5, 0.6]],  # fractions of the dataset to keep\n",
+    "        },\n",
+    "        \"candidate_loss_contribution\": {\n",
+    "            \"loss_rel_change_max\": [0.00001, 0.000001, 0.0000001]#, 0.001, 0.0001]] # maximum relative change in loss\n",
+    "        }\n",
+    "}\n",
+    "\n",
+    "for approach, function in approaches.items():\n",
+    "    minimization_function = function\n",
+    "    # other minimization function approach is \"random_sampling_minimization\", for which you can specify the tolerance for loss relative change.\n",
+    "\n",
+    "    for params, values in optional_params[approach].items():\n",
+    "        for value in values:\n",
+    "            if params == \"target_fractions\":\n",
+    "                for file in files:\n",
+    "                    output_path = STORAGE_FOLDER / approach / f\"{value}_enhanced_cps_2024_minimised.h5\"\n",
+    "                    output_path.parent.mkdir(parents=True, exist_ok=True)\n",
+    "                    minimize_dataset(\n",
+    "                        file,\n",
+    "                        output_path,\n",
+    "                        minimization_function=minimization_function, \n",
+    "                        target_fractions=[value]\n",
+    "                    )\n",
+    "            elif params == \"loss_rel_change_max\":\n",
+    "                for file in files:\n",
+    "                    output_path = STORAGE_FOLDER / approach / f\"{value}_enhanced_cps_2024_minimised.h5\"\n",
+    "                    output_path.parent.mkdir(parents=True, exist_ok=True)\n",
+    "                    minimize_dataset(\n",
+    "                        file,\n",
+    "                        output_path,\n",
+    "                        minimization_function=minimization_function, \n",
+    "                        loss_rel_change_max=value\n",
+    "                    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8568b5ca",
+   "metadata": {},
+   "source": [
+    "## Visualization of Results\n",
+    "Calibration logs can also be shown in María's Vercel dashboard"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f8b0fe2e",
+   "metadata": {},
+   "source": [
+    "### Data Scrape for Plotting"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "225debd8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>strategy</th>\n",
+       "      <th>parameter</th>\n",
+       "      <th>dataset_size</th>\n",
+       "      <th>total_loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>none</td>\n",
+       "      <td>none</td>\n",
+       "      <td>41310</td>\n",
+       "      <td>0.0069</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  strategy parameter  dataset_size  total_loss\n",
+       "0     none      none         41310      0.0069"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\"\"\"\n",
+    "Creating scoring of loss\n",
+    "Creating dataframe to store regularization results\n",
+    "\"\"\"\n",
+    "\n",
+    "\n",
+    "def get_output_path(approach, file_name):\n",
+    "    output_path = STORAGE_FOLDER / approach / file_name\n",
+    "    output_path.parent.mkdir(parents=True, exist_ok=True)\n",
+    "    return output_path\n",
+    "\n",
+    "# Calculate quality categories\n",
+    "def loss_score(calibration_log):\n",
+    "    excellent_count = (\n",
+    "        calibration_log[\"rel_abs_error\"] < 0.05).sum()  # < 5% error\n",
+    "    good_count = (\n",
+    "        (calibration_log[\"rel_abs_error\"] >= 0.05)\n",
+    "        & (calibration_log[\"rel_abs_error\"] < 0.20)).sum()  # 5-20% error\n",
+    "    total_targets = len(calibration_log)\n",
+    "    # Calculate quality score\n",
+    "    quality_score = (excellent_count * 100 + good_count * 75) / total_targets\n",
+    "    return quality_score\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Initial dataframe setup\n",
+    "reg_results_df = pd.DataFrame({\n",
+    "    'strategy': ['none'],\n",
+    "    'parameter': ['none'],\n",
+    "    'dataset_size': [41310],\n",
+    "    'total_loss': [6.9e-3]\n",
+    "})\n",
+    "\n",
+    "def add_result(df, strategy, parameter, dataset_size, total_loss):\n",
+    "    new_rows = pd.DataFrame({\n",
+    "        'strategy': strategy,        \n",
+    "        'parameter': parameter,      \n",
+    "        'dataset_size': [dataset_size],\n",
+    "        'total_loss': [total_loss]\n",
+    "    })\n",
+    "    return pd.concat([reg_results_df, new_rows], ignore_index=True)\n",
+    "\n",
+    "# Example usage\n",
+    "#reg_results_df = add_result(reg_results_df, ['L1', 'L2'], ['0.001','0.002'], [35000, 4000], [7.2e-3, 7.2e-3])\n",
+    "reg_results_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7bb3ef3c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>strategy</th>\n",
+       "      <th>parameter</th>\n",
+       "      <th>dataset_size</th>\n",
+       "      <th>total_loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>none</td>\n",
+       "      <td>none</td>\n",
+       "      <td>41310</td>\n",
+       "      <td>0.006900</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>candidate_loss_contribution</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>41310</td>\n",
+       "      <td>0.006900</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>random_sampling_minimization</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>20655</td>\n",
+       "      <td>80.882353</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>random_sampling_minimization</td>\n",
+       "      <td>0.6</td>\n",
+       "      <td>24786</td>\n",
+       "      <td>79.117647</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>random_sampling_minimization</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>41310</td>\n",
+       "      <td>0.006900</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>candidate_loss_contribution</td>\n",
+       "      <td>0.001</td>\n",
+       "      <td>41310</td>\n",
+       "      <td>77.647059</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>candidate_loss_contribution</td>\n",
+       "      <td>0.0001</td>\n",
+       "      <td>41310</td>\n",
+       "      <td>80.196078</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                       strategy parameter  dataset_size  total_loss\n",
+       "0                          none      none         41310    0.006900\n",
+       "1   candidate_loss_contribution       1.0         41310    0.006900\n",
+       "2  random_sampling_minimization       0.5         20655   80.882353\n",
+       "3  random_sampling_minimization       0.6         24786   79.117647\n",
+       "4  random_sampling_minimization       1.0         41310    0.006900\n",
+       "5   candidate_loss_contribution     0.001         41310   77.647059\n",
+       "6   candidate_loss_contribution    0.0001         41310   80.196078"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\"\"\"\n",
+    "Scraping values from created calibration_log.csv and .h5 files to populate the plotting dataframe\n",
+    "\"\"\"\n",
+    "\n",
+    "og_size = 41310  # Original size of the dataset\n",
+    "og_loss = 6.9e-3  # Original loss from the baseline dataset\n",
+    "\n",
+    "approaches = [\"l0_sigmoid\", \"l0_log\", \"l0_exp\", \"l1\"]\n",
+    "penalty_weights = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5]\n",
+    "\n",
+    "for approach in approaches:\n",
+    "    strategy = approach\n",
+    "    reg_results_df = add_result(reg_results_df, strategy, 1.0, og_size, og_loss)\n",
+    "    for penalty_weight in penalty_weights:\n",
+    "        parameter = penalty_weight\n",
+    "\n",
+    "        # Pull length of .h5 file\n",
+    "        h5_name = f\"enhanced_cps_2024_{strategy}_{parameter}_minimised.h5\"\n",
+    "        h5_path = get_output_path(strategy, h5_name)\n",
+    "        dataset_size = len(h5py.File(h5_path, \"r\")['household_weight/2024'])\n",
+    "\n",
+    "        # Pull score of loss column\n",
+    "        cal_log_name = f\"calibration_log_{strategy}_{parameter}.csv\"\n",
+    "        cal_log_path = get_output_path(strategy, cal_log_name)\n",
+    "        calibration_log = pd.read_csv(cal_log_path)\n",
+    "        loss_value = loss_score(calibration_log)\n",
+    "        \n",
+    "        reg_results_df = add_result(reg_results_df, strategy, parameter, dataset_size, loss_value)\n",
+    "\n",
+    "approaches = {\n",
+    "    \"random_sampling_minimization\":[0.5, 0.6], #, 0.7, 0.8, 0.9], \n",
+    "    \"candidate_loss_contribution\": [0.001, 0.0001] #, 0.00001, 0.000001, 0.0000001],\n",
+    "}\n",
+    "\n",
+    "for approach, fractions in approaches.items():  # Use .items() to get key-value pairs\n",
+    "    reg_results_df = add_result(reg_results_df, strategy, 1.0, og_size, og_loss)\n",
+    "    for fraction in fractions:\n",
+    "        strategy = approach\n",
+    "        parameter = fraction\n",
+    "\n",
+    "        # Pull length of .h5 file\n",
+    "        h5_name = f\"{fraction}_enhanced_cps_2024_minimised.h5\"\n",
+    "        h5_path = STORAGE_FOLDER / strategy / h5_name\n",
+    "        dataset_size = len(h5py.File(h5_path, \"r\")['household_weight/2024'])\n",
+    "\n",
+    "        # Pull sum of loss column\n",
+    "        cal_log_name = f\"{fraction}_enhanced_cps_2024_minimised_calibration_log.csv\"\n",
+    "        cal_log_path = get_output_path(strategy, cal_log_name)\n",
+    "        calibration_log = pd.read_csv(cal_log_path)\n",
+    "        loss_value = loss_score(calibration_log)\n",
+    "\n",
+    "        reg_results_df = add_result(reg_results_df, strategy, parameter, dataset_size, loss_value)\n",
+    "\n",
+    "reg_results_df\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5b203ccd",
+   "metadata": {},
+   "source": [
+    "### Plotting"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "2dc0891c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.plotly.v1+json": {
+       "config": {
+        "plotlyServerURL": "https://plot.ly"
+       },
+       "data": [
+        {
+         "customdata": [
+          [
+           "candidate_loss_contribution"
+          ],
+          [
+           "candidate_loss_contribution"
+          ],
+          [
+           "candidate_loss_contribution"
+          ]
+         ],
+         "hovertemplate": "Strategy: %{customdata[0]}<br>Size: %{x}<br>Loss: %{y:.4f}<br> Param: %{text}",
+         "legendgroup": "candidate_loss_contribution",
+         "line": {
+          "color": "#636efa",
+          "dash": "solid"
+         },
+         "marker": {
+          "symbol": "circle"
+         },
+         "mode": "lines+markers+text",
+         "name": "candidate_loss_contribution",
+         "orientation": "v",
+         "showlegend": true,
+         "text": [
+          "1.0",
+          "0.001",
+          "0.0001"
+         ],
+         "textposition": "top center",
+         "type": "scatter",
+         "x": [
+          41310,
+          41310,
+          41310
+         ],
+         "xaxis": "x",
+         "y": [
+          0.0069,
+          77.6470588235294,
+          80.19607843137256
+         ],
+         "yaxis": "y"
+        },
+        {
+         "customdata": [
+          [
+           "random_sampling_minimization"
+          ],
+          [
+           "random_sampling_minimization"
+          ],
+          [
+           "random_sampling_minimization"
+          ]
+         ],
+         "hovertemplate": "Strategy: %{customdata[0]}<br>Size: %{x}<br>Loss: %{y:.4f}<br> Param: %{text}",
+         "legendgroup": "random_sampling_minimization",
+         "line": {
+          "color": "#EF553B",
+          "dash": "solid"
+         },
+         "marker": {
+          "symbol": "circle"
+         },
+         "mode": "lines+markers+text",
+         "name": "random_sampling_minimization",
+         "orientation": "v",
+         "showlegend": true,
+         "text": [
+          "1.0",
+          "0.6",
+          "0.5"
+         ],
+         "textposition": "top center",
+         "type": "scatter",
+         "x": [
+          41310,
+          24786,
+          20655
+         ],
+         "xaxis": "x",
+         "y": [
+          0.0069,
+          79.11764705882354,
+          80.88235294117646
+         ],
+         "yaxis": "y"
+        }
+       ],
+       "layout": {
+        "annotations": [
+         {
+          "arrowhead": 1,
+          "ax": 40,
+          "ay": -40,
+          "font": {
+           "color": "gray"
+          },
+          "showarrow": true,
+          "text": "Baseline",
+          "x": 41310,
+          "y": 0.0069
+         }
+        ],
+        "height": 600,
+        "hovermode": "closest",
+        "legend": {
+         "title": {
+          "text": "Strategy"
+         },
+         "tracegroupgap": 0
+        },
+        "shapes": [
+         {
+          "line": {
+           "color": "gray",
+           "dash": "dash"
+          },
+          "name": "Baseline Size",
+          "type": "line",
+          "x0": 41310,
+          "x1": 41310,
+          "y0": 0.0069,
+          "y1": 80.88235294117646
+         },
+         {
+          "line": {
+           "color": "gray",
+           "dash": "dash"
+          },
+          "name": "Baseline Loss",
+          "type": "line",
+          "x0": 20655,
+          "x1": 41310,
+          "y0": 0.0069,
+          "y1": 0.0069
+         }
+        ],
+        "template": {
+         "data": {
+          "bar": [
+           {
+            "error_x": {
+             "color": "#2a3f5f"
+            },
+            "error_y": {
+             "color": "#2a3f5f"
+            },
+            "marker": {
+             "line": {
+              "color": "#E5ECF6",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "bar"
+           }
+          ],
+          "barpolar": [
+           {
+            "marker": {
+             "line": {
+              "color": "#E5ECF6",
+              "width": 0.5
+             },
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "barpolar"
+           }
+          ],
+          "carpet": [
+           {
+            "aaxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "white",
+             "linecolor": "white",
+             "minorgridcolor": "white",
+             "startlinecolor": "#2a3f5f"
+            },
+            "baxis": {
+             "endlinecolor": "#2a3f5f",
+             "gridcolor": "white",
+             "linecolor": "white",
+             "minorgridcolor": "white",
+             "startlinecolor": "#2a3f5f"
+            },
+            "type": "carpet"
+           }
+          ],
+          "choropleth": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "choropleth"
+           }
+          ],
+          "contour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "contour"
+           }
+          ],
+          "contourcarpet": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "contourcarpet"
+           }
+          ],
+          "heatmap": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmap"
+           }
+          ],
+          "heatmapgl": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "heatmapgl"
+           }
+          ],
+          "histogram": [
+           {
+            "marker": {
+             "pattern": {
+              "fillmode": "overlay",
+              "size": 10,
+              "solidity": 0.2
+             }
+            },
+            "type": "histogram"
+           }
+          ],
+          "histogram2d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2d"
+           }
+          ],
+          "histogram2dcontour": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "histogram2dcontour"
+           }
+          ],
+          "mesh3d": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "type": "mesh3d"
+           }
+          ],
+          "parcoords": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "parcoords"
+           }
+          ],
+          "pie": [
+           {
+            "automargin": true,
+            "type": "pie"
+           }
+          ],
+          "scatter": [
+           {
+            "fillpattern": {
+             "fillmode": "overlay",
+             "size": 10,
+             "solidity": 0.2
+            },
+            "type": "scatter"
+           }
+          ],
+          "scatter3d": [
+           {
+            "line": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatter3d"
+           }
+          ],
+          "scattercarpet": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattercarpet"
+           }
+          ],
+          "scattergeo": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergeo"
+           }
+          ],
+          "scattergl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattergl"
+           }
+          ],
+          "scattermapbox": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scattermapbox"
+           }
+          ],
+          "scatterpolar": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolar"
+           }
+          ],
+          "scatterpolargl": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterpolargl"
+           }
+          ],
+          "scatterternary": [
+           {
+            "marker": {
+             "colorbar": {
+              "outlinewidth": 0,
+              "ticks": ""
+             }
+            },
+            "type": "scatterternary"
+           }
+          ],
+          "surface": [
+           {
+            "colorbar": {
+             "outlinewidth": 0,
+             "ticks": ""
+            },
+            "colorscale": [
+             [
+              0,
+              "#0d0887"
+             ],
+             [
+              0.1111111111111111,
+              "#46039f"
+             ],
+             [
+              0.2222222222222222,
+              "#7201a8"
+             ],
+             [
+              0.3333333333333333,
+              "#9c179e"
+             ],
+             [
+              0.4444444444444444,
+              "#bd3786"
+             ],
+             [
+              0.5555555555555556,
+              "#d8576b"
+             ],
+             [
+              0.6666666666666666,
+              "#ed7953"
+             ],
+             [
+              0.7777777777777778,
+              "#fb9f3a"
+             ],
+             [
+              0.8888888888888888,
+              "#fdca26"
+             ],
+             [
+              1,
+              "#f0f921"
+             ]
+            ],
+            "type": "surface"
+           }
+          ],
+          "table": [
+           {
+            "cells": {
+             "fill": {
+              "color": "#EBF0F8"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "header": {
+             "fill": {
+              "color": "#C8D4E3"
+             },
+             "line": {
+              "color": "white"
+             }
+            },
+            "type": "table"
+           }
+          ]
+         },
+         "layout": {
+          "annotationdefaults": {
+           "arrowcolor": "#2a3f5f",
+           "arrowhead": 0,
+           "arrowwidth": 1
+          },
+          "autotypenumbers": "strict",
+          "coloraxis": {
+           "colorbar": {
+            "outlinewidth": 0,
+            "ticks": ""
+           }
+          },
+          "colorscale": {
+           "diverging": [
+            [
+             0,
+             "#8e0152"
+            ],
+            [
+             0.1,
+             "#c51b7d"
+            ],
+            [
+             0.2,
+             "#de77ae"
+            ],
+            [
+             0.3,
+             "#f1b6da"
+            ],
+            [
+             0.4,
+             "#fde0ef"
+            ],
+            [
+             0.5,
+             "#f7f7f7"
+            ],
+            [
+             0.6,
+             "#e6f5d0"
+            ],
+            [
+             0.7,
+             "#b8e186"
+            ],
+            [
+             0.8,
+             "#7fbc41"
+            ],
+            [
+             0.9,
+             "#4d9221"
+            ],
+            [
+             1,
+             "#276419"
+            ]
+           ],
+           "sequential": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ],
+           "sequentialminus": [
+            [
+             0,
+             "#0d0887"
+            ],
+            [
+             0.1111111111111111,
+             "#46039f"
+            ],
+            [
+             0.2222222222222222,
+             "#7201a8"
+            ],
+            [
+             0.3333333333333333,
+             "#9c179e"
+            ],
+            [
+             0.4444444444444444,
+             "#bd3786"
+            ],
+            [
+             0.5555555555555556,
+             "#d8576b"
+            ],
+            [
+             0.6666666666666666,
+             "#ed7953"
+            ],
+            [
+             0.7777777777777778,
+             "#fb9f3a"
+            ],
+            [
+             0.8888888888888888,
+             "#fdca26"
+            ],
+            [
+             1,
+             "#f0f921"
+            ]
+           ]
+          },
+          "colorway": [
+           "#636efa",
+           "#EF553B",
+           "#00cc96",
+           "#ab63fa",
+           "#FFA15A",
+           "#19d3f3",
+           "#FF6692",
+           "#B6E880",
+           "#FF97FF",
+           "#FECB52"
+          ],
+          "font": {
+           "color": "#2a3f5f"
+          },
+          "geo": {
+           "bgcolor": "white",
+           "lakecolor": "white",
+           "landcolor": "#E5ECF6",
+           "showlakes": true,
+           "showland": true,
+           "subunitcolor": "white"
+          },
+          "hoverlabel": {
+           "align": "left"
+          },
+          "hovermode": "closest",
+          "mapbox": {
+           "style": "light"
+          },
+          "paper_bgcolor": "white",
+          "plot_bgcolor": "#E5ECF6",
+          "polar": {
+           "angularaxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           },
+           "bgcolor": "#E5ECF6",
+           "radialaxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           }
+          },
+          "scene": {
+           "xaxis": {
+            "backgroundcolor": "#E5ECF6",
+            "gridcolor": "white",
+            "gridwidth": 2,
+            "linecolor": "white",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "white"
+           },
+           "yaxis": {
+            "backgroundcolor": "#E5ECF6",
+            "gridcolor": "white",
+            "gridwidth": 2,
+            "linecolor": "white",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "white"
+           },
+           "zaxis": {
+            "backgroundcolor": "#E5ECF6",
+            "gridcolor": "white",
+            "gridwidth": 2,
+            "linecolor": "white",
+            "showbackground": true,
+            "ticks": "",
+            "zerolinecolor": "white"
+           }
+          },
+          "shapedefaults": {
+           "line": {
+            "color": "#2a3f5f"
+           }
+          },
+          "ternary": {
+           "aaxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           },
+           "baxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           },
+           "bgcolor": "#E5ECF6",
+           "caxis": {
+            "gridcolor": "white",
+            "linecolor": "white",
+            "ticks": ""
+           }
+          },
+          "title": {
+           "x": 0.05
+          },
+          "xaxis": {
+           "automargin": true,
+           "gridcolor": "white",
+           "linecolor": "white",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "white",
+           "zerolinewidth": 2
+          },
+          "yaxis": {
+           "automargin": true,
+           "gridcolor": "white",
+           "linecolor": "white",
+           "ticks": "",
+           "title": {
+            "standoff": 15
+           },
+           "zerolinecolor": "white",
+           "zerolinewidth": 2
+          }
+         }
+        },
+        "title": {
+         "text": "ECPS Regularization Strategy Comparison"
+        },
+        "width": 900,
+        "xaxis": {
+         "anchor": "y",
+         "domain": [
+          0,
+          1
+         ],
+         "title": {
+          "text": "Number of Households"
+         }
+        },
+        "yaxis": {
+         "anchor": "x",
+         "domain": [
+          0,
+          1
+         ],
+         "title": {
+          "text": "Calibration Score"
+         }
+        }
+       }
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "\"\"\"\n",
+    "Creating a multi-line plot with plotly\n",
+    "\"\"\"\n",
+    "\n",
+    "# Filter out the baseline row\n",
+    "df_plot = reg_results_df[reg_results_df['strategy'] != 'none'].copy()\n",
+    "df_plot['parameter'] = df_plot['parameter'].astype(str)\n",
+    "df_plot = df_plot.sort_values(by=['strategy', 'dataset_size'], ascending=[True, False])\n",
+    "\n",
+    "# Create line plot\n",
+    "fig = px.line(\n",
+    "    df_plot,\n",
+    "    x=\"dataset_size\",\n",
+    "    y=\"total_loss\",\n",
+    "    color=\"strategy\",\n",
+    "    markers=True,\n",
+    "    text=\"parameter\",\n",
+    "    custom_data=[\"strategy\"],\n",
+    "    title=\"ECPS Regularization Strategy Comparison\",\n",
+    "    labels={\n",
+    "        \"dataset_size\": \"Number of Households\",\n",
+    "        \"total_loss\": \"Calibration Score\",\n",
+    "        \"strategy\": \"Regularization Approach\"\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "# Add text labels (parameter) on hover\n",
+    "fig.update_traces(\n",
+    "    textposition=\"top center\", \n",
+    "    hovertemplate=(\n",
+    "        \"Strategy: %{customdata[0]}<br>\"\n",
+    "        \"Size: %{x}<br>\"\n",
+    "        \"Loss: %{y:.4f}<br> \"\n",
+    "        \"Param: %{text}\"\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# Add baseline lines\n",
+    "baseline = reg_results_df[reg_results_df['strategy'] == 'none'].iloc[0]\n",
+    "\n",
+    "fig.add_shape(\n",
+    "    type=\"line\",\n",
+    "    x0=baseline[\"dataset_size\"], x1=baseline[\"dataset_size\"],\n",
+    "    y0=df_plot[\"total_loss\"].min(), y1=df_plot[\"total_loss\"].max(),\n",
+    "    line=dict(color=\"gray\", dash=\"dash\"),\n",
+    "    name=\"Baseline Size\"\n",
+    ")\n",
+    "\n",
+    "fig.add_shape(\n",
+    "    type=\"line\",\n",
+    "    x0=df_plot[\"dataset_size\"].min(), x1=df_plot[\"dataset_size\"].max(),\n",
+    "    y0=baseline[\"total_loss\"], y1=baseline[\"total_loss\"],\n",
+    "    line=dict(color=\"gray\", dash=\"dash\"),\n",
+    "    name=\"Baseline Loss\"\n",
+    ")\n",
+    "\n",
+    "# Add annotation for the baseline\n",
+    "fig.add_annotation(\n",
+    "    x=baseline[\"dataset_size\"],\n",
+    "    y=baseline[\"total_loss\"],\n",
+    "    text=\"Baseline\",\n",
+    "    showarrow=True,\n",
+    "    arrowhead=1,\n",
+    "    ax=40,\n",
+    "    ay=-40,\n",
+    "    font=dict(color=\"gray\"),\n",
+    ")\n",
+    "\n",
+    "# Final layout adjustments\n",
+    "fig.update_layout(\n",
+    "    legend_title=\"Strategy\",\n",
+    "    hovermode=\"closest\",\n",
+    "    width=900,\n",
+    "    height=600\n",
+    ")\n",
+    "\n",
+    "fig.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "policyengine-us-data",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}