Added a method to compute data drift on a ME (#170)

fterrazzoni · web-flow · commit cebcd933183d · 2021-09-22T09:11:07.000+02:00
* Added a method to compute data drift on a ME

* Added a basic explanation to DSSModelEvaluation.compute_data_drift()

* Wording: input -&gt; data (drift)

* Allow both IDs and object instances to be used as reference for drift computation

* Make full_model_like_id() public &amp; available on both prediction &amp; clustering models

* Clarify that compute_data_drift() accepts either a Python object or an ID

* full_model_like_id -&gt; full_id

* Auto-wait for future results in compute_data_drift()

(by default)
diff --git a/dataikuapi/dss/ml.py b/dataikuapi/dss/ml.py
@@ -1804,6 +1804,10 @@ def get_raw(self):
         """
         return self.details
 
+    @property
+    def full_id(self):
+        return self.details["fullModelId"]
+
     def get_raw_snippet(self):
         """
         Gets the raw dictionary of trained model snippet. 
diff --git a/dataikuapi/dss/modelevaluationstore.py b/dataikuapi/dss/modelevaluationstore.py
@@ -3,6 +3,7 @@
 
 from dataikuapi.dss.metrics import ComputedMetrics
 from .discussion import DSSObjectDiscussions
+from .future import DSSFuture
 
 from requests import utils
 
@@ -289,6 +290,34 @@ def delete(self):
         self.client._perform_json(
                 "DELETE", "/projects/%s/modelevaluationstores/%s/runs/" % (self.project_key, self.mes_id), body=obj)
 
+    @property
+    def full_id(self):
+        return "ME-%s-%s-%s"%(self.project_key, self.mes_id, self.run_id)
+
+    def compute_data_drift(self, reference=None, data_drift_params=None, wait=True):
+        """
+        Compute data drift against a reference model or model evaluation. The reference is determined automatically unless specified.
+
+        :param reference: saved model version (full ID or DSSTrainedPredictionModelDetails)
+                or model evaluation (full ID or DSSModelEvaluation) to use as reference (optional)
+        :type reference: Union[str, DSSModelEvaluation, DSSTrainedPredictionModelDetails]
+        :param data_drift_params: data drift computation settings (optional)
+        :param wait: data drift computation settings (optional)
+        :returns: a `dict` containing data drift analysis results if `wait` is `True`, or a :class:`~dataikuapi.dss.future.DSSFuture` handle otherwise
+        """
+
+        if hasattr(reference, 'full_id'):
+            reference = reference.full_id
+
+        future_response = self.client._perform_json(
+            "POST", "/projects/%s/modelevaluationstores/%s/runs/%s/computeDataDrift" % (self.project_key, self.mes_id, self.run_id),
+            body={
+                "referenceId": reference,
+                "dataDriftParams": data_drift_params
+            })
+        future = DSSFuture(self.client, future_response.get('jobId', None), future_response)
+        return future.wait_for_result() if wait else future
+
     def get_metrics(self):
         """
         Get the metrics for this model evaluation. Metrics must be understood here as Metrics in DSS Metrics & Checks