PolicyEngine · juaristi22 · Feb 7, 2026 · Feb 1, 2026 · Feb 1, 2026 · Feb 5, 2026
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,5 @@
+- bump: minor
+  changes:
+    added:
+    - Error bars and grid lines to visualizations.
+    - Notebook benchmarking models on additional datasets.
diff --git a/docs/imputation-benchmarking/cross-validation.md b/docs/imputation-benchmarking/cross-validation.md
@@ -44,20 +44,28 @@ Returns a dictionary containing separate results for each metric type:
 ```python
 {
     "quantile_loss": {
-        "results": pd.DataFrame,  # rows: ["train", "test"], cols: quantiles
+        "results": pd.DataFrame,      # rows: ["train", "test"], cols: quantiles (mean across folds)
+        "results_std": pd.DataFrame,  # rows: ["train", "test"], cols: quantiles (std across folds)
         "mean_train": float,
         "mean_test": float,
-        "variables": List[str]   # numerical variables evaluated
+        "std_train": float,
+        "std_test": float,
+        "variables": List[str]        # numerical variables evaluated
     },
     "log_loss": {
-        "results": pd.DataFrame,  # rows: ["train", "test"], cols: quantiles
+        "results": pd.DataFrame,      # rows: ["train", "test"], cols: quantiles
+        "results_std": pd.DataFrame,  # rows: ["train", "test"], cols: quantiles (std across folds)
         "mean_train": float,
         "mean_test": float,
-        "variables": List[str]   # categorical variables evaluated
+        "std_train": float,
+        "std_test": float,
+        "variables": List[str]        # categorical variables evaluated
     }
 }
 ```
 
+The `results_std` DataFrame and `std_train`/`std_test` values provide the standard deviation of the loss across cross-validation folds, which can be used to visualize uncertainty via error bars.
+
 If `tune_hyperparameters=True`, returns a tuple of `(results_dict, best_hyperparameters)`.
 
 ## Example usage

diff --git a/docs/imputation-benchmarking/visualizations.md b/docs/imputation-benchmarking/visualizations.md
@@ -37,6 +37,7 @@ class MethodComparisonResults:
         show_mean: bool = True,
         figsize: Tuple[int, int] = (PLOT_CONFIG["width"], PLOT_CONFIG["height"]),
         plot_type: str = "bar",
+        show_error_bars: bool = True,
     ) -> go.Figure
 
     def summary(self, format: str = "wide") -> pd.DataFrame
@@ -53,6 +54,7 @@ class MethodComparisonResults:
 | show_mean | bool | True | Show horizontal lines for mean loss |
 | figsize | tuple | (width, height) | Figure dimensions in pixels |
 | plot_type | str | "bar" | Plot type: "bar" for grouped bars, "stacked" for contribution analysis |
+| show_error_bars | bool | True | Show error bars representing standard deviation across CV folds |
 
 The `"stacked"` plot type shows rank-based contribution scores, useful for understanding how each variable contributes to overall model performance.
 
@@ -134,6 +136,7 @@ class PerformanceResults:
         title: Optional[str] = None,
         save_path: Optional[str] = None,
         figsize: Tuple[int, int] = (PLOT_CONFIG["width"], PLOT_CONFIG["height"]),
+        show_error_bars: bool = True,
     ) -> go.Figure
 
     def summary(self) -> pd.DataFrame
@@ -146,6 +149,7 @@ class PerformanceResults:
 | title | str | None | Custom plot title |
 | save_path | str | None | Path to save the plot |
 | figsize | tuple | (width, height) | Figure dimensions in pixels |
+| show_error_bars | bool | True | Show error bars representing standard deviation across CV folds |
 
 For quantile loss, the plot shows train and test loss across quantiles as grouped bars. For log loss, the plot includes the loss bars and optionally confusion matrix and class probability distribution subplots. For combined metrics, both are shown in subplots.
 

diff --git a/microimpute/__init__.py b/microimpute/__init__.py
@@ -72,6 +72,11 @@
 except ImportError:
     pass
 
+try:
+    from microimpute.models.mdn import MDN
+except ImportError:
+    pass
+
 # Import visualization modules
 from microimpute.visualizations import (
     MethodComparisonResults,

diff --git a/microimpute/config.py b/microimpute/config.py
@@ -84,5 +84,28 @@
 PLOT_CONFIG: Dict[str, Any] = {
     "width": 750,
     "height": 600,
-    "colors": {},
+    # Plotly Safe palette - colorblind-friendly
+    "color_palette": [
+        "#88CCEE",  # Cyan
+        "#CC6677",  # Rose
+        "#DDCC77",  # Sand
+        "#117733",  # Green
+        "#332288",  # Indigo
+        "#AA4499",  # Purple
+        "#44AA99",  # Teal
+        "#999933",  # Olive
+        "#882255",  # Wine
+        "#661100",  # Brown
+    ],
+    # Background colors (same for both)
+    "plot_bgcolor": "#FAFAFA",
+    "paper_bgcolor": "#FAFAFA",
+    # Grid styling (horizontal only)
+    "gridcolor": "#E5E5E5",
+    "gridwidth": 1,
+    "showgrid_x": False,
+    "showgrid_y": True,
+    # Axis line styling
+    "linecolor": "#CCCCCC",
+    "showline": True,
 }
diff --git a/microimpute/evaluations/cross_validation.py b/microimpute/evaluations/cross_validation.py
@@ -494,15 +494,21 @@ def cross_validate_model(
         Dictionary containing separate results for quantile_loss and log_loss:
         {
             "quantile_loss": {
-                "results": pd.DataFrame,  # rows: ["train", "test"], cols: quantiles
+                "results": pd.DataFrame,  # rows: ["train", "test"], cols: quantiles (mean across folds)
+                "results_std": pd.DataFrame,  # rows: ["train", "test"], cols: quantiles (std across folds)
                 "mean_train": float,
                 "mean_test": float,
+                "std_train": float,  # std of mean loss across folds
+                "std_test": float,  # std of mean loss across folds
                 "variables": List[str]
             },
             "log_loss": {
                 "results": pd.DataFrame,  # rows: ["train", "test"], cols: quantiles (constant values)
+                "results_std": pd.DataFrame,  # rows: ["train", "test"], cols: quantiles (std across folds)
                 "mean_train": float,
                 "mean_test": float,
+                "std_train": float,
+                "std_test": float,
                 "variables": List[str]
             }
         }
@@ -676,26 +682,54 @@ def cross_validate_model(
                     index=["train", "test"],
                 )
 
-                # Calculate means
+                # Create std DataFrame for error bars
+                std_df = pd.DataFrame(
+                    [
+                        {
+                            q: np.std(values) if len(values) > 1 else 0.0
+                            for q, values in metric_results[metric_type][
+                                "train"
+                            ].items()
+                        },
+                        {
+                            q: np.std(values) if len(values) > 1 else 0.0
+                            for q, values in metric_results[metric_type][
+                                "test"
+                            ].items()
+                        },
+                    ],
+                    index=["train", "test"],
+                )
+
+                # Calculate means and stds across all quantiles
                 mean_test = combined_df.loc["test"].mean()
                 mean_train = combined_df.loc["train"].mean()
+                std_test = std_df.loc["test"].mean()
+                std_train = std_df.loc["train"].mean()
 
                 final_results[metric_type] = {
                     "results": combined_df,  # Single DataFrame with train/test rows
+                    "results_std": std_df,  # Std across folds for each quantile
                     "mean_train": mean_train,
                     "mean_test": mean_test,
+                    "std_train": std_train,
+                    "std_test": std_test,
                     "variables": metric_results[metric_type]["variables"],
                 }
 
                 log.info(
-                    f"{metric_type} - Mean Train: {mean_train:.6f}, Mean Test: {mean_test:.6f}"
+                    f"{metric_type} - Mean Train: {mean_train:.6f} (±{std_train:.6f}), "
+                    f"Mean Test: {mean_test:.6f} (±{std_test:.6f})"
                 )
             else:
                 # No variables use this metric
                 final_results[metric_type] = {
                     "results": pd.DataFrame(),  # Empty DataFrame
+                    "results_std": pd.DataFrame(),  # Empty DataFrame
                     "mean_train": np.nan,
                     "mean_test": np.nan,
+                    "std_train": np.nan,
+                    "std_test": np.nan,
                     "variables": [],
                 }
 

diff --git a/microimpute/models/mdn.py b/microimpute/models/mdn.py
@@ -291,6 +291,9 @@ def fit(
             verbose=False,
             suppress_lightning_logger=True,
         )
+        # Disable Lightning's default CSVLogger to avoid
+        # "dict contains fields not in fieldnames" errors
+        self.model.logger = False
 
         self.model.fit(train=train_data)
 
@@ -477,6 +480,9 @@ def fit(
             verbose=False,
             suppress_lightning_logger=True,
         )
+        # Disable Lightning's default CSVLogger to avoid
+        # "dict contains fields not in fieldnames" errors
+        self.model.logger = False
 
         self.model.fit(train=train_data)
 

diff --git a/microimpute/models/quantreg.py b/microimpute/models/quantreg.py
@@ -32,6 +32,7 @@ def __init__(
         quantiles_specified: bool = False,
         boolean_targets: Optional[Dict[str, Dict]] = None,
         constant_targets: Optional[Dict[str, Dict]] = None,
+        dummy_processor: Optional[Any] = None,
     ) -> None:
         """Initialize the QuantReg results.
 
@@ -46,6 +47,7 @@ def __init__(
                 names before dummy encoding.
             quantiles_specified: Whether quantiles were explicitly specified during fit.
             boolean_targets: Dictionary of boolean target info for conversion back to bool.
+            dummy_processor: Processor for handling dummy encoding in test data.
         """
         super().__init__(
             predictors,
@@ -59,6 +61,7 @@ def __init__(
         self.quantiles_specified = quantiles_specified
         self.boolean_targets = boolean_targets or {}
         self.constant_targets = constant_targets or {}
+        self.dummy_processor = dummy_processor
 
     @validate_call(config=VALIDATE_CONFIG)
     def _predict(
@@ -414,6 +417,7 @@ def _fit(
                 quantiles_specified=(quantiles is not None),
                 boolean_targets=boolean_targets,
                 constant_targets=constant_targets,
+                dummy_processor=getattr(self, "dummy_processor", None),
             )
         except Exception as e:
             self.logger.error(f"Error fitting QuantReg model: {str(e)}")