PytorchConnectomics
diff --git a/‎connectomics/data/dataset/dataset_base.py‎
Lines changed: 18 additions & 0 deletions b/‎connectomics/data/dataset/dataset_base.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎connectomics/decoding/optuna_tuner.py‎
Lines changed: 75 additions & 35 deletions b/‎connectomics/decoding/optuna_tuner.py‎
Lines changed: 75 additions & 35 deletions
diff --git a/‎connectomics/training/lit/config.py‎
Lines changed: 5 additions & 1 deletion b/‎connectomics/training/lit/config.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎connectomics/training/lit/model.py‎
Lines changed: 53 additions & 13 deletions b/‎connectomics/training/lit/model.py‎
Lines changed: 53 additions & 13 deletions
@@ -214,6 +214,24 @@ def __init__(
             self.dataset_length = len(data_dicts)
 
     def __len__(self) -> int:
+        """
+        Return dataset length.
+        
+        For CacheDataset with cache_rate < 1.0, we must return the actual
+        number of cached items, not the requested iter_num, to avoid IndexError.
+        """
+        # If using partial caching, return the actual cached data length
+        # CacheDataset stores cached indices in self._cache
+        if hasattr(self, '_cache') and len(self._cache) < len(self.data):
+            # Partial caching: return cached length for validation
+            # For training with iter_num, we still want to iterate iter_num times
+            if self.mode == 'train' and self.iter_num > 0:
+                return self.dataset_length
+            else:
+                # For validation/test, only iterate over cached items
+                return len(self._cache)
+        
+        # Full caching or no caching: use dataset_length
         return self.dataset_length
 
 
 
@@ -125,12 +125,22 @@ def _load_data(self, data: np.ndarray | str | Path, name: str) -> np.ndarray:
 
     def _validate_data(self):
         """Validate data shapes and types."""
+        # Handle 2D data: (C, H, W) → (C, 1, H, W)
+        if self.predictions.ndim == 3:
+            print(f"  📐 2D data detected, expanding predictions: {self.predictions.shape} → {self.predictions.shape[:1] + (1,) + self.predictions.shape[1:]}")
+            self.predictions = self.predictions[:, np.newaxis, :, :]
+        
         # Predictions should be (C, D, H, W)
         if self.predictions.ndim != 4:
             raise ValueError(
                 f"Predictions should be 4D (C, D, H, W), got shape {self.predictions.shape}"
             )
 
+        # Handle 2D ground truth: (H, W) → (1, H, W)
+        if self.ground_truth.ndim == 2:
+            print(f"  📐 2D ground truth detected, expanding: {self.ground_truth.shape} → {(1,) + self.ground_truth.shape}")
+            self.ground_truth = self.ground_truth[np.newaxis, :, :]
+        
         # Ground truth should be (D, H, W)
         if self.ground_truth.ndim != 3:
             raise ValueError(
@@ -145,8 +155,12 @@ def _validate_data(self):
                 f"ground_truth {self.ground_truth.shape}"
             )
 
-        # Check mask if provided
+        # Handle 2D mask if provided
         if self.mask is not None:
+            if self.mask.ndim == 2:
+                print(f"  📐 2D mask detected, expanding: {self.mask.shape} → {(1,) + self.mask.shape}")
+                self.mask = self.mask[np.newaxis, :, :]
+            
             if self.mask.shape != self.ground_truth.shape:
                 raise ValueError(
                     f"Mask shape {self.mask.shape} doesn't match "
@@ -170,7 +184,7 @@ def optimize(self) -> optuna.Study:
         direction = self._get_optimization_direction()
 
         # Create storage directory if using SQLite
-        storage = self.tune_cfg.get("storage", None)
+        storage = getattr(self.tune_cfg, "storage", None)
         if storage and storage.startswith("sqlite:///"):
             # Extract database file path from SQLite URL
             db_path = storage.replace("sqlite:///", "")
@@ -204,7 +218,7 @@ def optimize(self) -> optuna.Study:
             self._objective,
             n_trials=n_trials,
             timeout=timeout,
-            show_progress_bar=self.tune_cfg.logging.get("show_progress_bar", True),
+            show_progress_bar=getattr(self.tune_cfg.logging, "show_progress_bar", True),
         )
 
         # Print results
@@ -219,7 +233,7 @@ def _create_sampler(self) -> optuna.samplers.BaseSampler:
         """Create Optuna sampler from config."""
         sampler_cfg = self.tune_cfg.sampler
         sampler_name = sampler_cfg["name"]
-        sampler_kwargs = sampler_cfg.get("kwargs", {})
+        sampler_kwargs = getattr(sampler_cfg, "kwargs", {})
 
         # Convert OmegaConf to dict
         if isinstance(sampler_kwargs, DictConfig):
@@ -236,13 +250,13 @@ def _create_sampler(self) -> optuna.samplers.BaseSampler:
 
     def _create_pruner(self) -> Optional[optuna.pruners.BasePruner]:
         """Create Optuna pruner from config."""
-        pruner_cfg = self.tune_cfg.get("pruner", None)
+        pruner_cfg = getattr(self.tune_cfg, "pruner", None)
 
-        if pruner_cfg is None or not pruner_cfg.get("enabled", False):
+        if pruner_cfg is None or not getattr(pruner_cfg, "enabled", False):
             return None
 
-        pruner_name = pruner_cfg.get("name", "Median")
-        pruner_kwargs = pruner_cfg.get("kwargs", {})
+        pruner_name = getattr(pruner_cfg, "name", "Median")
+        pruner_kwargs = getattr(pruner_cfg, "kwargs", {})
 
         # Convert OmegaConf to dict
         if isinstance(pruner_kwargs, DictConfig):
@@ -338,7 +352,7 @@ def _objective(self, trial: optuna.Trial) -> float:
             )
 
         # Print progress
-        if self.tune_cfg.logging.get("verbose", True):
+        if getattr(self.tune_cfg.logging, "verbose", True):
             print(f"Trial {self.trial_count:3d}: {metric_name}={metric_value:.4f}")
 
         return metric_value
@@ -546,7 +560,7 @@ def _print_results(self, study: optuna.Study):
         for key, value in best_decoding_params.items():
             print(f"    {key}: {value}")
 
-        if self.param_space_cfg.get("postprocessing", {}).get("enabled", False):
+        if getattr(self.param_space_cfg, "postprocessing", None) and getattr(self.param_space_cfg.postprocessing, "enabled", False):
             best_postproc_params = self._reconstruct_postproc_params(study.best_params)
             if best_postproc_params:
                 print(f"\n  Post-processing params:")
@@ -662,8 +676,13 @@ def run_tuning(model, trainer, cfg, checkpoint_path=None):
     print("\n[1/4] Running inference on tuning dataset...")
 
     # Get tune config sections (used later for loading predictions, ground truth, masks)
-    tune_data = cfg.tune.get("data", {})
-    tune_output = cfg.tune.get("output", {})
+    tune_data = getattr(cfg.tune, "data", None)
+    tune_output = getattr(cfg.tune, "output", None)
+    
+    if tune_data is None:
+        raise ValueError("Missing tune.data in configuration")
+    if tune_output is None:
+        raise ValueError("Missing tune.output in configuration")
 
     # Create datamodule with tune mode (reads from cfg.tune.data)
     # Uses inference settings from cfg.inference (sliding window, TTA, save_predictions, etc.)
@@ -677,8 +696,8 @@ def run_tuning(model, trainer, cfg, checkpoint_path=None):
 
     # Step 2: Load predictions from saved files
     print("\n[2/4] Loading predictions from saved files...")
-    output_pred_dir = tune_output.get("output_pred", str(output_dir.parent / "results"))
-    cache_suffix = tune_output.get("cache_suffix", "_tta_prediction.h5")
+    output_pred_dir = getattr(tune_output, "output_pred", str(output_dir.parent / "results"))
+    cache_suffix = getattr(tune_output, "cache_suffix", "_tta_prediction.h5")
     predictions_dir = Path(output_pred_dir)
 
     # Find all prediction files using cache_suffix from config
@@ -711,13 +730,21 @@ def run_tuning(model, trainer, cfg, checkpoint_path=None):
 
     # Step 3: Load ground truth
     print("\n[3/4] Loading ground truth labels...")
-    tune_label_pattern = tune_data.get("tune_label", None)
+    tune_label_pattern = getattr(tune_data, "tune_label", None)
 
     if tune_label_pattern is None:
         raise ValueError("Missing tune.data.tune_label in configuration")
 
-    # Handle glob patterns (can match multiple files)
-    label_files = sorted(glob.glob(tune_label_pattern))
+    # Handle both string patterns and pre-resolved lists
+    if isinstance(tune_label_pattern, list):
+        # Already resolved to list of files
+        label_files = sorted(tune_label_pattern)
+    elif isinstance(tune_label_pattern, str):
+        # Glob pattern - expand it
+        label_files = sorted(glob.glob(tune_label_pattern))
+    else:
+        raise TypeError(f"tune_label must be string or list, got {type(tune_label_pattern)}")
+    
     if not label_files:
         raise FileNotFoundError(f"No label files found matching pattern: {tune_label_pattern}")
 
@@ -740,10 +767,16 @@ def run_tuning(model, trainer, cfg, checkpoint_path=None):
 
     # Load mask if available
     mask = None
-    tune_mask_pattern = tune_data.get("tune_mask", None)
+    tune_mask_pattern = getattr(tune_data, "tune_mask", None)
     if tune_mask_pattern:
-        # Handle glob patterns
-        mask_files = sorted(glob.glob(tune_mask_pattern))
+        # Handle both string patterns and pre-resolved lists
+        if isinstance(tune_mask_pattern, list):
+            mask_files = sorted(tune_mask_pattern)
+        elif isinstance(tune_mask_pattern, str):
+            mask_files = sorted(glob.glob(tune_mask_pattern))
+        else:
+            raise TypeError(f"tune_mask must be string or list, got {type(tune_mask_pattern)}")
+        
         if not mask_files:
             print(f"  ⚠️  No mask files found matching pattern: {tune_mask_pattern}")
         else:
@@ -820,12 +853,11 @@ def load_and_apply_best_params(cfg):
     print(OmegaConf.to_yaml(best_params))
 
     # Apply to test.decoding config
-    # Note: test is Dict[str, Any], so we need to handle it carefully
     if cfg.test is None:
-        cfg.test = {}
+        cfg.test = OmegaConf.create({})
 
-    if "decoding" not in cfg.test:
-        cfg.test["decoding"] = []
+    if not hasattr(cfg.test, "decoding") or cfg.test.decoding is None:
+        cfg.test.decoding = []
 
     # Find the decoding function in test.decoding that matches the tuned function
     decoding_function = best_params.get("decoding_function", None)
@@ -836,24 +868,32 @@ def load_and_apply_best_params(cfg):
     else:
         # Find decoder with matching function name
         decoder_idx = None
-        for idx, decoder in enumerate(cfg.test["decoding"]):
-            if decoder.get("name") == decoding_function:
+        for idx, decoder in enumerate(cfg.test.decoding):
+            decoder_name = decoder.get("name") if isinstance(decoder, dict) else getattr(decoder, "name", None)
+            if decoder_name == decoding_function:
                 decoder_idx = idx
                 break
 
         if decoder_idx is None:
             # Create new decoder entry
-            decoder_idx = len(cfg.test["decoding"])
-            cfg.test["decoding"].append({"name": decoding_function, "kwargs": {}})
+            decoder_idx = len(cfg.test.decoding)
+            cfg.test.decoding.append({"name": decoding_function, "kwargs": {}})
 
     # Update parameters
-    if decoder_idx < len(cfg.test["decoding"]):
-        decoder = cfg.test["decoding"][decoder_idx]
-        if "kwargs" not in decoder:
-            decoder["kwargs"] = {}
-
-        # Apply best parameters
-        decoder["kwargs"].update(OmegaConf.to_container(best_params["parameters"]))
+    if decoder_idx < len(cfg.test.decoding):
+        decoder = cfg.test.decoding[decoder_idx]
+        
+        # Handle both dict and config object
+        if isinstance(decoder, dict):
+            if "kwargs" not in decoder:
+                decoder["kwargs"] = {}
+            decoder["kwargs"].update(OmegaConf.to_container(best_params["decoding_params"]))
+        else:
+            if not hasattr(decoder, "kwargs") or decoder.kwargs is None:
+                decoder.kwargs = {}
+            # Update kwargs with best parameters
+            for key, value in best_params["decoding_params"].items():
+                decoder.kwargs[key] = value
 
         print(f"✓ Applied best parameters to test.decoding[{decoder_idx}]")
 
 
@@ -637,7 +637,11 @@ def setup(self, stage=None):
         )
     else:
         # Standard data module
-        use_cache = cfg.data.use_cache
+        # Disable caching for test/tune modes to avoid issues with partial cache returning 0 length
+        use_cache = cfg.data.use_cache and mode == "train"
+        
+        if mode in ["test", "tune"] and cfg.data.use_cache:
+            print("  ⚠️  Caching disabled for test/tune mode (incompatible with partial cache)")
 
         # Note: transpose_axes is now handled in the transform builders (build_train/val/test_transforms)
         # which embed the transpose in LoadVolumed, so no need to pass it here
 
@@ -214,9 +214,12 @@ def _invert_save_prediction_transform(self, data: np.ndarray) -> np.ndarray:
         data = data.astype(np.float32)
 
         # Invert the scaling if it was applied
-        if intensity_scale is not None and intensity_scale != 1.0:
+        # Note: intensity_scale < 0 means scaling was disabled, so no inversion needed
+        if intensity_scale is not None and intensity_scale > 0 and intensity_scale != 1.0:
             data = data / float(intensity_scale)
             print(f"  🔄 Inverted intensity scaling by {intensity_scale}")
+        elif intensity_scale is not None and intensity_scale < 0:
+            print(f"  ℹ️  Intensity scaling was disabled (scale={intensity_scale}), no inversion needed")
 
         return data
 
@@ -296,14 +299,26 @@ def _compute_test_metrics(self, decoded_predictions: np.ndarray, labels: torch.T
         pred_tensor = torch.from_numpy(decoded_predictions).float().to(self.device)
         labels_tensor = labels.float()
 
+        # Remove batch and channel dimensions
         pred_tensor = pred_tensor.squeeze()
         labels_tensor = labels_tensor.squeeze()
 
-        if pred_tensor.ndim != labels_tensor.ndim:
-            if pred_tensor.ndim == labels_tensor.ndim - 1:
-                pred_tensor = pred_tensor.unsqueeze(0)
-            elif labels_tensor.ndim == pred_tensor.ndim - 1:
-                labels_tensor = labels_tensor.unsqueeze(0)
+        # Ensure both tensors have the same shape
+        if pred_tensor.shape != labels_tensor.shape:
+            print(f"  ⚠️  Shape mismatch: pred={pred_tensor.shape}, labels={labels_tensor.shape}")
+            
+            # Try to align dimensions
+            if pred_tensor.ndim != labels_tensor.ndim:
+                if pred_tensor.ndim == labels_tensor.ndim - 1:
+                    pred_tensor = pred_tensor.unsqueeze(0)
+                elif labels_tensor.ndim == pred_tensor.ndim - 1:
+                    labels_tensor = labels_tensor.unsqueeze(0)
+            
+            # If still mismatched after dimension alignment, skip metrics
+            if pred_tensor.shape != labels_tensor.shape:
+                print(f"  ❌ Cannot compute metrics: incompatible shapes after alignment")
+                print(f"     pred={pred_tensor.shape}, labels={labels_tensor.shape}")
+                return
 
         if pred_tensor.max() <= 1.0:
             pred_binary = (pred_tensor > 0.5).long()
@@ -548,17 +563,42 @@ def configure_optimizers(self) -> Dict[str, Any]:
         """Configure optimizers and learning rate schedulers."""
         optimizer = build_optimizer(self.cfg, self.model)
 
-        # Build scheduler if configured
-        if hasattr(self.cfg, 'scheduler') and self.cfg.scheduler is not None:
+        # Build scheduler if configured (check both cfg.scheduler and cfg.optimization.scheduler)
+        has_scheduler = (
+            (hasattr(self.cfg, 'scheduler') and self.cfg.scheduler is not None) or
+            (hasattr(self.cfg, 'optimization') and hasattr(self.cfg.optimization, 'scheduler') and self.cfg.optimization.scheduler is not None)
+        )
+        
+        if has_scheduler:
             scheduler = build_lr_scheduler(self.cfg, optimizer)
 
+            # Check if this is ReduceLROnPlateau (requires metric monitoring)
+            scheduler_config = {
+                'scheduler': scheduler,
+                'interval': 'epoch',
+                'frequency': 1,
+            }
+            
+            # ReduceLROnPlateau requires the 'monitor' key to pass the metric value
+            if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
+                # Get monitor metric from scheduler config
+                monitor_metric = None
+                if hasattr(self.cfg, 'optimization') and hasattr(self.cfg.optimization, 'scheduler'):
+                    monitor_metric = getattr(self.cfg.optimization.scheduler, 'monitor', None)
+                elif hasattr(self.cfg, 'scheduler'):
+                    monitor_metric = getattr(self.cfg.scheduler, 'monitor', None)
+                
+                if monitor_metric:
+                    scheduler_config['monitor'] = monitor_metric
+                    print(f"  ✅ ReduceLROnPlateau will monitor: {monitor_metric}")
+                else:
+                    # Default to validation loss
+                    scheduler_config['monitor'] = 'val_loss_total'
+                    print(f"  ⚠️  ReduceLROnPlateau will monitor: val_loss_total (default, no monitor specified in config)")
+
             return {
                 'optimizer': optimizer,
-                'lr_scheduler': {
-                    'scheduler': scheduler,
-                    'interval': 'epoch',
-                    'frequency': 1,
-                },
+                'lr_scheduler': scheduler_config,
             }
         else:
             return optimizer