v2: More validation

dweindl · dweindl · commit d54a003dbb68 · 2025-04-28T12:29:57.000+02:00
* Check priors
* Check observables
* Fix missing prior parameters after v1-&gt;v2 conversion of uniform priors
* Fix style
diff --git a/petab/v2/core.py b/petab/v2/core.py
@@ -890,6 +890,9 @@ def _validate_id(cls, v):
     @field_validator("prior_parameters", mode="before")
     @classmethod
     def _validate_prior_parameters(cls, v):
+        if isinstance(v, float) and np.isnan(v):
+            return []
+
         if isinstance(v, str):
             v = v.split(C.PARAMETER_SEPARATOR)
         elif not isinstance(v, Sequence):
diff --git a/petab/v2/lint.py b/petab/v2/lint.py
@@ -14,6 +14,9 @@
 import pandas as pd
 import sympy as sp
 
+from ..v1.visualize.lint import validate_visualization_df
+from ..v2.C import *
+from .core import PriorDistribution
 from .problem import Problem
 
 logger = logging.getLogger(__name__)
@@ -37,6 +40,8 @@
     "CheckUnusedExperiments",
     "CheckObservablesDoNotShadowModelEntities",
     "CheckUnusedConditions",
+    "CheckAllObservablesDefined",
+    "CheckPriorDistribution",
     "lint_problem",
     "default_validation_tasks",
 ]
@@ -77,8 +82,12 @@ def __post_init__(self):
     def __str__(self):
         return f"{self.level.name}: {self.message}"
 
-    def _get_task_name(self):
-        """Get the name of the ValidationTask that raised this error."""
+    @staticmethod
+    def _get_task_name() -> str | None:
+        """Get the name of the ValidationTask that raised this error.
+
+        Expected to be called from below a `ValidationTask.run`.
+        """
         import inspect
 
         # walk up the stack until we find the ValidationTask.run method
@@ -88,6 +97,7 @@ def _get_task_name(self):
                 task = frame.f_locals["self"]
                 if isinstance(task, ValidationTask):
                     return task.__class__.__name__
+        return None
 
 
 @dataclass
@@ -222,6 +232,8 @@ def run(self, problem: Problem) -> ValidationIssue | None:
                 f"Missing files: {', '.join(missing_files)}"
             )
 
+        return None
+
 
 class CheckModel(ValidationTask):
     """A task to validate the model of a PEtab problem."""
@@ -234,6 +246,8 @@ def run(self, problem: Problem) -> ValidationIssue | None:
             # TODO get actual model validation messages
             return ValidationError("Model is invalid.")
 
+        return None
+
 
 class CheckMeasuredObservablesDefined(ValidationTask):
     """A task to check that all observables referenced by the measurements
@@ -252,10 +266,13 @@ def run(self, problem: Problem) -> ValidationIssue | None:
                 "measurement table but not defined in observable table."
             )
 
+        return None
+
 
 class CheckOverridesMatchPlaceholders(ValidationTask):
     """A task to check that the number of observable/noise parameters
-    in the measurements match the number of placeholders in the observables."""
+    in the measurements matches the number of placeholders in the observables.
+    """
 
     def run(self, problem: Problem) -> ValidationIssue | None:
         observable_parameters_count = {
@@ -320,18 +337,20 @@ def run(self, problem: Problem) -> ValidationIssue | None:
         if messages:
             return ValidationError("\n".join(messages))
 
+        return None
+
 
 class CheckPosLogMeasurements(ValidationTask):
     """Check that measurements for observables with
     log-transformation are positive."""
 
     def run(self, problem: Problem) -> ValidationIssue | None:
-        from .core import NoiseDistribution as nd
+        from .core import NoiseDistribution as ND  # noqa: N813
 
         log_observables = {
             o.id
             for o in problem.observable_table.observables
-            if o.noise_distribution in [nd.LOG_NORMAL, nd.LOG_LAPLACE]
+            if o.noise_distribution in [ND.LOG_NORMAL, ND.LOG_LAPLACE]
         }
         if log_observables:
             for m in problem.measurement_table.measurements:
@@ -342,6 +361,8 @@ def run(self, problem: Problem) -> ValidationIssue | None:
                         f"positive, but {m.measurement} <= 0 for {m}"
                     )
 
+        return None
+
 
 class CheckMeasuredExperimentsDefined(ValidationTask):
     """A task to check that all experiments referenced by measurements
@@ -369,6 +390,8 @@ def run(self, problem: Problem) -> ValidationIssue | None:
                 + str(missing_experiments)
             )
 
+        return None
+
 
 class CheckValidConditionTargets(ValidationTask):
     """Check that all condition table targets are valid."""
@@ -418,6 +441,32 @@ def run(self, problem: Problem) -> ValidationIssue | None:
                             f"{invalid} at time {period.time}."
                         )
                     period_targets |= condition_targets
+        return None
+
+
+class CheckAllObservablesDefined(ValidationTask):
+    """A task to validate that all observables in the measurement table are
+    defined in the observable table."""
+
+    def run(self, problem: Problem) -> ValidationIssue | None:
+        if problem.measurement_df is None:
+            return None
+
+        measurement_df = problem.measurement_df
+        observable_df = problem.observable_df
+        used_observables = set(measurement_df[OBSERVABLE_ID].values)
+        defined_observables = (
+            set(observable_df.index.values)
+            if observable_df is not None
+            else set()
+        )
+        if undefined_observables := (used_observables - defined_observables):
+            return ValidationError(
+                f"Observables {undefined_observables} are used in the"
+                "measurements table but are not defined in observables table."
+            )
+
+        return None
 
 
 class CheckUniquePrimaryKeys(ValidationTask):
@@ -429,37 +478,39 @@ def run(self, problem: Problem) -> ValidationIssue | None:
 
         # check for uniqueness of all primary keys
         counter = Counter(c.id for c in problem.condition_table.conditions)
-        duplicates = {id for id, count in counter.items() if count > 1}
+        duplicates = {id_ for id_, count in counter.items() if count > 1}
 
         if duplicates:
             return ValidationError(
                 f"Condition table contains duplicate IDs: {duplicates}"
             )
 
         counter = Counter(o.id for o in problem.observable_table.observables)
-        duplicates = {id for id, count in counter.items() if count > 1}
+        duplicates = {id_ for id_, count in counter.items() if count > 1}
 
         if duplicates:
             return ValidationError(
                 f"Observable table contains duplicate IDs: {duplicates}"
             )
 
         counter = Counter(e.id for e in problem.experiment_table.experiments)
-        duplicates = {id for id, count in counter.items() if count > 1}
+        duplicates = {id_ for id_, count in counter.items() if count > 1}
 
         if duplicates:
             return ValidationError(
                 f"Experiment table contains duplicate IDs: {duplicates}"
             )
 
         counter = Counter(p.id for p in problem.parameter_table.parameters)
-        duplicates = {id for id, count in counter.items() if count > 1}
+        duplicates = {id_ for id_, count in counter.items() if count > 1}
 
         if duplicates:
             return ValidationError(
                 f"Parameter table contains duplicate IDs: {duplicates}"
             )
 
+        return None
+
 
 class CheckObservablesDoNotShadowModelEntities(ValidationTask):
     """A task to check that observable IDs do not shadow model entities."""
@@ -479,6 +530,8 @@ def run(self, problem: Problem) -> ValidationIssue | None:
                 f"Observable IDs {shadowed_entities} shadow model entities."
             )
 
+        return None
+
 
 class CheckExperimentTable(ValidationTask):
     """A task to validate the experiment table of a PEtab problem."""
@@ -498,6 +551,8 @@ def run(self, problem: Problem) -> ValidationIssue | None:
         if messages:
             return ValidationError("\n".join(messages))
 
+        return None
+
 
 class CheckExperimentConditionsExist(ValidationTask):
     """A task to validate that all conditions in the experiment table exist
@@ -526,6 +581,8 @@ def run(self, problem: Problem) -> ValidationIssue | None:
         if messages:
             return ValidationError("\n".join(messages))
 
+        return None
+
 
 class CheckAllParametersPresentInParameterTable(ValidationTask):
     """Ensure all required parameters are contained in the parameter table
@@ -573,6 +630,8 @@ def run(self, problem: Problem) -> ValidationIssue | None:
                 + str(extraneous)
             )
 
+        return None
+
 
 class CheckValidParameterInConditionOrParameterTable(ValidationTask):
     """A task to check that all required and only allowed model parameters are
@@ -646,9 +705,11 @@ def run(self, problem: Problem) -> ValidationIssue | None:
                 "the condition table and the parameter table."
             )
 
+        return None
+
 
 class CheckUnusedExperiments(ValidationTask):
-    """A task to check for experiments that are not used in the measurements
+    """A task to check for experiments that are not used in the measurement
     table."""
 
     def run(self, problem: Problem) -> ValidationIssue | None:
@@ -668,9 +729,11 @@ def run(self, problem: Problem) -> ValidationIssue | None:
                 "measurements table."
             )
 
+        return None
+
 
 class CheckUnusedConditions(ValidationTask):
-    """A task to check for conditions that are not used in the experiments
+    """A task to check for conditions that are not used in the experiment
     table."""
 
     def run(self, problem: Problem) -> ValidationIssue | None:
@@ -692,6 +755,8 @@ def run(self, problem: Problem) -> ValidationIssue | None:
                 "experiments table."
             )
 
+        return None
+
 
 class CheckVisualizationTable(ValidationTask):
     """A task to validate the visualization table of a PEtab problem."""
@@ -700,14 +765,64 @@ def run(self, problem: Problem) -> ValidationIssue | None:
         if problem.visualization_df is None:
             return None
 
-        from ..v1.visualize.lint import validate_visualization_df
-
         if validate_visualization_df(problem):
             return ValidationIssue(
                 level=ValidationIssueSeverity.ERROR,
                 message="Visualization table is invalid.",
             )
 
+        return None
+
+
+class CheckPriorDistribution(ValidationTask):
+    """A task to validate the prior distribution of a PEtab problem."""
+
+    _num_pars = {
+        PriorDistribution.CAUCHY: 2,
+        PriorDistribution.CHI_SQUARED: 1,
+        PriorDistribution.EXPONENTIAL: 1,
+        PriorDistribution.GAMMA: 2,
+        PriorDistribution.LAPLACE: 2,
+        PriorDistribution.LOG10_NORMAL: 2,
+        PriorDistribution.LOG_LAPLACE: 2,
+        PriorDistribution.LOG_NORMAL: 2,
+        PriorDistribution.LOG_UNIFORM: 2,
+        PriorDistribution.NORMAL: 2,
+        PriorDistribution.RAYLEIGH: 1,
+        PriorDistribution.UNIFORM: 2,
+    }
+
+    def run(self, problem: Problem) -> ValidationIssue | None:
+        messages = []
+        for parameter in problem.parameter_table.parameters:
+            if parameter.prior_distribution is None:
+                continue
+
+            if parameter.prior_distribution not in PRIOR_DISTRIBUTIONS:
+                messages.append(
+                    f"Prior distribution `{parameter.prior_distribution}' "
+                    f"for parameter `{parameter.id}' is not valid."
+                )
+                continue
+
+            if (
+                exp_num_par := self._num_pars[parameter.prior_distribution]
+            ) != len(parameter.prior_parameters):
+                messages.append(
+                    f"Prior distribution `{parameter.prior_distribution}' "
+                    f"for parameter `{parameter.id}' requires "
+                    f"{exp_num_par} parameters, but got "
+                    f"{len(parameter.prior_parameters)} "
+                    f"({parameter.prior_parameters})."
+                )
+
+            # TODO: check distribution parameter domains
+
+        if messages:
+            return ValidationError("\n".join(messages))
+
+        return None
+
 
 def get_valid_parameters_for_parameter_table(
     problem: Problem,
@@ -752,7 +867,7 @@ def get_valid_parameters_for_parameter_table(
         if mapping.model_id and mapping.model_id in parameter_ids.keys():
             parameter_ids[mapping.petab_id] = None
 
-    # add output parameters from observables table
+    # add output parameters from observable table
     output_parameters = get_output_parameters(problem)
     for p in output_parameters:
         if p not in invalid:
@@ -781,7 +896,7 @@ def get_required_parameters_for_parameter_table(
     problem: Problem,
 ) -> Set[str]:
     """
-    Get set of parameters which need to go into the parameter table
+    Get the set of parameters that need to go into the parameter table
 
     Arguments:
         problem: The PEtab problem
@@ -965,4 +1080,9 @@ def get_placeholders(
     # TODO: atomize checks, update to long condition table, re-enable
     # CheckVisualizationTable(),
     # TODO validate mapping table
+    CheckValidParameterInConditionOrParameterTable(),
+    CheckAllObservablesDefined(),
+    CheckAllParametersPresentInParameterTable(),
+    CheckValidConditionTargets(),
+    CheckPriorDistribution(),
 ]
diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py
@@ -455,4 +455,21 @@ def update_prior(row):
         errors="ignore",
     )
 
+    # if uniform, we need to explicitly set the parameters
+    def update_prior_pars(row):
+        prior_type = row.get(v2.C.PRIOR_DISTRIBUTION)
+        prior_pars = row.get(v2.C.PRIOR_PARAMETERS)
+
+        if prior_type not in (v2.C.UNIFORM, v2.C.LOG_UNIFORM) or not pd.isna(
+            prior_pars
+        ):
+            return prior_pars
+
+        return (
+            f"{row[v2.C.LOWER_BOUND]}{v2.C.PARAMETER_SEPARATOR}"
+            f"{row[v2.C.UPPER_BOUND]}"
+        )
+
+    df[v2.C.PRIOR_PARAMETERS] = df.apply(update_prior_pars, axis=1)
+
     return df